# This notebook uses .tfrecord instead of just making a new dataset every time and saving it on memory. It speeds up the training time significantly. And it got me my top score in the competition

In [None]:
import numpy as np
import pandas as pd
import random
import os
import re
import json
from transformers import set_seed, BertTokenizer, TFBertForSequenceClassification, BertConfig
import tensorflow as tf
%pip install optuna
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)
set_seed(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Install necessary packages for Azure SQL connection
%pip install mysql-connector-python 
%pip install PyMySQL

# Initialize TPU
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    tpu_cores = strategy.num_replicas_in_sync
    print(f"TPU cores available: {tpu_cores}")
except ValueError:
    print("TPU not found")
    raise SystemExit

# Set fixed batch size and learning rate parameters
base_learning_rate = 1e-5
batch_size_per_core = 32
tpu_cores = 8
batch_size = batch_size_per_core * tpu_cores
learning_rate = base_learning_rate * (batch_size / (batch_size_per_core * tpu_cores))

# Load the training data
train_data = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
kaggle_test_data = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')

# Split the data into 75% training and 25% validation sets
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42, stratify=train_data['target'])

# Clean the text data
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)     # Remove mentions
    text = re.sub(r'\d+', '', text)      # Remove numbers
    text = re.sub(r'[^\w\s#]', '', text)  # Remove punctuation except hashtags
    text = text.lower()                  # Convert to lowercase
    return text

train_data['clean_text'] = train_data['text'].apply(clean_text)
val_data['clean_text'] = val_data['text'].apply(clean_text)
kaggle_test_data['clean_text'] = kaggle_test_data['text'].apply(clean_text)

# Tokenize the text data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=64,
        padding='max_length',
        truncation=True,
        return_tensors='tf'
    )

# # Convert dataframes to TFRecord files
# def dataframe_to_tfrecord(dataframe, tfrecord_file, include_target=True):
#     with tf.io.TFRecordWriter(tfrecord_file) as writer:
#         for _, row in dataframe.iterrows():
#             feature = {
#                 'id': tf.train.Feature(int64_list=tf.train.Int64List(value=[row['id']])),
#                 'clean_text': tf.train.Feature(bytes_list=tf.train.BytesList(value=[row['clean_text'].encode('utf-8')])),
#             }
#             if include_target:
#                 feature['target'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[row['target']]))
#             example = tf.train.Example(features=tf.train.Features(feature=feature))
#             writer.write(example.SerializeToString())

# dataframe_to_tfrecord(train_data[['id', 'clean_text', 'target']], 'train_data.tfrecord')
# dataframe_to_tfrecord(val_data[['id', 'clean_text', 'target']], 'val_data.tfrecord')
# dataframe_to_tfrecord(val_data[['id', 'clean_text', 'target']], 'fine_tune_data.tfrecord')
# dataframe_to_tfrecord(kaggle_test_data[['id', 'clean_text']], 'kaggle_test_data.tfrecord', include_target=False)

# Parse TFRecord files and create datasets
def parse_tfrecord_fn(example, include_target=True):
    feature_description = {
        'id': tf.io.FixedLenFeature([], tf.int64),
        'clean_text': tf.io.FixedLenFeature([], tf.string),
    }
    if include_target:
        feature_description['target'] = tf.io.FixedLenFeature([], tf.int64)
    
    example = tf.io.parse_single_example(example, feature_description)
    example['clean_text'] = tf.strings.reduce_join(example['clean_text'])
    
    if include_target:
        return example['clean_text'], example['target']
    return example['clean_text']

def load_tfrecord_dataset(file_pattern, batch_size, include_target=True, repeat=True):
    files = tf.data.Dataset.list_files(file_pattern)
    dataset = files.interleave(tf.data.TFRecordDataset, cycle_length=4)
    dataset = dataset.map(lambda x: parse_tfrecord_fn(x, include_target), num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    if repeat:
        dataset = dataset.repeat()
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

train_tfrecord_dataset = load_tfrecord_dataset('/kaggle/input/tfrecord-disaster/train_data.tfrecord', batch_size)
val_tfrecord_dataset = load_tfrecord_dataset('/kaggle/input/tfrecord-disaster/val_data.tfrecord', batch_size)
fine_tune_tfrecord_dataset = load_tfrecord_dataset('/kaggle/input/tfrecord-disaster/fine_tune_data.tfrecord', batch_size)
kaggle_test_tfrecord_dataset = load_tfrecord_dataset('/kaggle/input/tfrecord-disaster/kaggle_test_data.tfrecord', batch_size, include_target=False, repeat=False)  # No repeat for test dataset

# Tokenize datasets
def tokenize_tfrecord_dataset(dataset, include_target=True):
    def tokenize_fn(text, target=None):
        encodings = tokenizer(
            [str(t, 'utf-8') for t in text.numpy()],
            max_length=64,
            padding='max_length',
            truncation=True,
            return_tensors='tf'
        )
        if include_target:
            return encodings['input_ids'], target
        return encodings['input_ids']

    def map_fn(text, target=None):
        if include_target:
            input_ids, target = tf.py_function(tokenize_fn, [text, target], [tf.int32, tf.int64])
            input_ids.set_shape([None, 64])
            target.set_shape([None])
            return {'input_ids': input_ids}, target
        input_ids = tf.py_function(tokenize_fn, [text], tf.int32)
        input_ids.set_shape([None, 64])
        return {'input_ids': input_ids}

    if include_target:
        return dataset.map(map_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    return dataset.map(map_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)

train_tfrecord_dataset = tokenize_tfrecord_dataset(train_tfrecord_dataset)
val_tfrecord_dataset = tokenize_tfrecord_dataset(val_tfrecord_dataset)
fine_tune_tfrecord_dataset = tokenize_tfrecord_dataset(fine_tune_tfrecord_dataset)
kaggle_test_tfrecord_dataset = tokenize_tfrecord_dataset(kaggle_test_tfrecord_dataset, include_target=False)

# Directory to save models
model_save_dir = './saved_models'
os.makedirs(model_save_dir, exist_ok=True)

# File to store top 5 model predictions
top_predictions_file = 'top_5_predictions.json'

# Load existing top 5 predictions
if os.path.exists(top_predictions_file):
    with open(top_predictions_file, 'r') as file:
        top_predictions = json.load(file)
else:
    top_predictions = []

# Function to save top predictions
def save_top_predictions(pre_fine_tuning_file, post_fine_tuning_file, val_accuracy, model_number):
    global top_predictions

    new_entry = {
        'model_number': model_number,
        'val_accuracy': val_accuracy,
        'pre_fine_tuning_file': pre_fine_tuning_file,
        'post_fine_tuning_file': post_fine_tuning_file
    }

    # Add the new entry and sort by validation accuracy
    top_predictions.append(new_entry)
    top_predictions = sorted(top_predictions, key=lambda x: x['val_accuracy'], reverse=True)

    # If there are more than 5 entries, remove the one with the lowest accuracy
    if len(top_predictions) > 5:
        removed_entry = top_predictions.pop()
        # Check if the files exist before attempting to remove them
        if os.path.exists(removed_entry['pre_fine_tuning_file']):
            os.remove(removed_entry['pre_fine_tuning_file'])
            print(f"File {removed_entry['pre_fine_tuning_file']} has been removed.")
        else:
            print(f"File {removed_entry['pre_fine_tuning_file']} does not exist and cannot be removed.")
        
        if os.path.exists(removed_entry['post_fine_tuning_file']):
            os.remove(removed_entry['post_fine_tuning_file'])
            print(f"File {removed_entry['post_fine_tuning_file']} has been removed.")
        else:
            print(f"File {removed_entry['post_fine_tuning_file']} does not exist and cannot be removed.")

    # Save the updated top predictions to file
    with open(top_predictions_file, 'w') as file:
        json.dump(top_predictions, file, indent=4)


# Define steps per epoch and validation steps
steps_per_epoch = len(train_data) // batch_size
validation_steps = len(val_data) // batch_size
fine_tune_steps_per_epoch = len(val_data) // batch_size  # Added step calculation for fine-tuning dataset

def objective(trial):
    num_epochs = trial.suggest_int("num_epochs", 1, 20)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.1)
    lr_scheduler_type = trial.suggest_categorical("lr_scheduler_type", ["constant", "linear", "cosine", "cosine_with_restarts"])
    gradient_clip_norm = trial.suggest_float("gradient_clip_norm", 0.0, 1.0)

    with strategy.scope():
        precision = tf.keras.metrics.Precision()
        recall = tf.keras.metrics.Recall()

        def f1_score_custom(y_true, y_pred):
            y_pred = tf.argmax(y_pred, axis=1)
            y_true = tf.cast(y_true, tf.int64)
            precision.update_state(y_true, y_pred)
            recall.update_state(y_true, y_pred)
            precision_result = precision.result()
            recall_result = recall.result()
            f1 = 2 * ((precision_result * recall_result) / (precision_result + recall_result + tf.keras.backend.epsilon()))
            return f1

        config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2, hidden_dropout_prob=dropout_rate)
        model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', config=config)

        if lr_scheduler_type == "linear":
            lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000,
                end_learning_rate=0.0,
                power=1.0
            )
        elif lr_scheduler_type == "cosine":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000
            )
        elif lr_scheduler_type == "cosine_with_restarts":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
                initial_learning_rate=learning_rate,
                first_decay_steps=1000
            )
        else:
            lr_schedule = learning_rate

        optimizer = tf.keras.optimizers.experimental.AdamW(
            learning_rate=lr_schedule,
            weight_decay=weight_decay,
            epsilon=1e-8,
            clipnorm=gradient_clip_norm
        )

        model.compile(
            optimizer=optimizer, 
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy'), f1_score_custom],
            steps_per_execution=1
        )

    model.fit(train_tfrecord_dataset, epochs=num_epochs, validation_data=val_tfrecord_dataset, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, verbose=1)

    val_loss, val_accuracy, val_f1_score = model.evaluate(val_tfrecord_dataset, steps=validation_steps, verbose=1)
    print(f"f1 score: {val_f1_score} and accuracy: {val_accuracy}")
    
    avg_score = (val_accuracy + val_f1_score) / 2

    if len(top_predictions) < 5 or val_accuracy > min(top_predictions, key=lambda x: x['val_accuracy'])['val_accuracy']:
        kaggle_test_predictions = model.predict(kaggle_test_tfrecord_dataset, steps=(len(kaggle_test_data) + batch_size - 1) // batch_size).logits
        
        # Ensure the prediction length matches the test data length
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()[:len(kaggle_test_data)]
        
        pre_fine_tuning_predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}_pre_fine_tuning_submission.csv") 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(pre_fine_tuning_predictions_file, index=False)

        training_data_size = len(train_data)
        fine_tune_data_size = len(val_data)
        fine_tune_epochs = max(1, round((fine_tune_data_size / training_data_size) * num_epochs))

        model.fit(fine_tune_tfrecord_dataset, epochs=fine_tune_epochs, steps_per_epoch=fine_tune_steps_per_epoch, verbose=1)  # Added steps_per_epoch

        kaggle_test_predictions = model.predict(kaggle_test_tfrecord_dataset, steps=(len(kaggle_test_data) + batch_size - 1) // batch_size).logits
        
        # Ensure the prediction length matches the test data length
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()[:len(kaggle_test_data)]
        
        post_fine_tuning_predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}_post_fine_tuning_submission.csv") 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(post_fine_tuning_predictions_file, index=False)

        save_top_predictions(pre_fine_tuning_predictions_file, post_fine_tuning_predictions_file, val_accuracy, trial.number)

    return val_accuracy


# Define your Optuna study, using the MySQL connection string
optuna_storage = 'mysql+pymysql://<username>:<password>@<host>/<database>?ssl_ca=<path_to_CA_cert>&ssl_verify_cert=true'

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
db_password = user_secrets.get_secret("DB_PASSWORD")

optuna_storage = f'mysql+pymysql://MichaelAzure:{db_password}@kaggle-third-sql.mysql.database.azure.com/kaggle_disaster_database?ssl_ca=/kaggle/input/certification&ssl_verify_cert=true'

studyName = 'disaster_tfrecord_BERT_1'
study = optuna.create_study(study_name=studyName,
                            storage=optuna_storage,
                            direction='maximize',
                            load_if_exists=True)

study.optimize(objective, n_trials=200)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

output_top_predictions_file = '/kaggle/working/top_5_predictions.json'
with open(output_top_predictions_file, 'w') as file:
    json.dump(top_predictions, file, indent=4)

print(f"Top 5 predictions saved to {output_top_predictions_file}")


  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1719415259.147937   18190 common_lib.cc:798] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="localhost"
=== Source Location Trace: ===
learning/45eac/tfrc/runtime/common_lib.cc:479
D0626 15:20:59.156627411   18190 config.cc:196]                        gRPC EXPERIMENT call_status_override_on_cancellation   OFF (default:OFF)
D0626 15:20:59.156642102   18190 config.cc:196]                        gRPC EXPERIMENT call_v3                                OFF (default:OFF)
D0626 15:20:59.156645590   18190 config.cc:196]                        gRPC EXPERIMENT canary_client_privacy                  ON  (default:ON)
D0626 15:20:59.156648202   18190 config.cc:196]                        gRPC EXPERIMENT capture_base_context                   ON  (default:ON)
D0626 15:20:59.156650774   18190 config.cc:196]                        gRPC EXPERI

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
INFO:tensorflow:Deallocate tpu buffer

I0000 00:00:1719415275.745821   18190 service.cc:145] XLA service 0x5815d524a8c0 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1719415275.745878   18190 service.cc:153]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1719415275.745883   18190 service.cc:153]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1719415275.745886   18190 service.cc:153]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1719415275.745895   18190 service.cc:153]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1719415275.745898   18190 service.cc:153]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1719415275.745900   18190 service.cc:153]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1719415275.745903   18190 service.cc:153]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1719415275.745906   18190 service.cc:153]   StreamExecutor device (7): TPU, 2a886c8


INFO:tensorflow:Finished initializing TPU system.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU, 0, 0)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:5, TPU, 0, 0)
I

[I 2024-06-26 15:21:22,439] Using an existing study with name 'disaster_tfrecord_BERT_1' instead of creating a new one.
I0000 00:00:1719415285.191598   18190 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/16
Cause: for/else statement not yet supported


Cause: for/else statement not yet supported


Cause: for/else statement not yet supported


2024-06-26 15:22:57.788143: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719415381.154449   19021 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7942c47e559dd746:0:0), session_name()
I0000 00:00:1719415427.291274   19021 tpu_compile_op_common.cc:245] Compilation of 7942c47e559dd746:0:0 with session name  took 46.136779569s and succeeded
I0000 00:00:1719415427.392554   19021 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7942c47e559dd746:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16066203469467049059", property.function_library_fingerprint = 7755191795025204138, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 15:24:03.089226: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719415443.860343   18999 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(428d1f7f8d097db3:0:0), session_name()
I0000 00:00:1719415448.918296   18999 tpu_compile_op_common.cc:245] Compilation of 428d1f7f8d097db3:0:0 with session name  took 5.057906135s and succeeded
I0000 00:00:1719415448.947986   18999 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(428d1f7f8d097db3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_18426024815014453821", property.function_library_fingerprint = 16613637223153925243, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/16


I0000 00:00:1719415449.735343   19019 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(16a8377b4fdd8cb3:0:0), session_name()
I0000 00:00:1719415491.230060   19019 tpu_compile_op_common.cc:245] Compilation of 16a8377b4fdd8cb3:0:0 with session name  took 41.494674174s and succeeded
I0000 00:00:1719415491.333431   19019 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(16a8377b4fdd8cb3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16066203469467049059", property.function_library_fingerprint = 7755191795025204138, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
f1 score: 0.7764623761177063 and accuracy: 0.83203125


2024-06-26 15:26:07.028724: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719415567.716371   18979 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d44012ab4d84dacd:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719415572.631502   18979 tpu_compile_op_common.cc:245] Compilation of d44012ab4d84dacd:0:0 with session name  took 4.915085681s and succeeded
I0000 00:00:1719415572.655796   18979 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d44012ab4d84dacd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_129789326858730406", property.function_library_fingerprint = 2620397898271371846, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719415572.655842   18979 tpu_compilation_cache_interface.cc:541] After adding entry for key d44012ab4d84dacd:0



I0000 00:00:1719415573.821764   18973 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4eb37bfd23837492:0:0), session_name()




I0000 00:00:1719415578.441293   18973 tpu_compile_op_common.cc:245] Compilation of 4eb37bfd23837492:0:0 with session name  took 4.619485696s and succeeded
I0000 00:00:1719415578.463409   18973 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4eb37bfd23837492:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_129789326858730406", property.function_library_fingerprint = 2620397898271371846, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719415578.463448   18973 tpu_compilation_cache_interface.cc:541] After adding entry for key 4eb37bfd23837492:0

Epoch 1/5
Epoch 2/5


I0000 00:00:1719415580.311630   19014 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cb066c13950fcc17:0:0), session_name()
I0000 00:00:1719415623.390212   19014 tpu_compile_op_common.cc:245] Compilation of cb066c13950fcc17:0:0 with session name  took 43.078543773s and succeeded
I0000 00:00:1719415623.495117   19014 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cb066c13950fcc17:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16066203469467049059", property.function_library_fingerprint = 7755191795025204138, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-06-26 15:27:10,093] Trial 704 finished with value: 0.83203125 and parameters: {'num_epochs': 16, 'dropout_rate': 0.20908963980599965, 'weight_decay': 0.03586932902981408, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7080978625262163}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 15:28:25.994675: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719415709.284292   19021 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(75d2173ad350d6b2:0:0), session_name()
I0000 00:00:1719415754.981720   19021 tpu_compile_op_common.cc:245] Compilation of 75d2173ad350d6b2:0:0 with session name  took 45.697350699s and succeeded
I0000 00:00:1719415755.108434   19021 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(75d2173ad350d6b2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9872901417910858362", property.function_library_fingerprint = 12386924205628269299, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 15:29:29.376280: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719415770.060835   18979 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a702224f95a46a0c:0:0), session_name()
I0000 00:00:1719415775.418281   18979 tpu_compile_op_common.cc:245] Compilation of a702224f95a46a0c:0:0 with session name  took 5.357399068s and succeeded
I0000 00:00:1719415775.446774   18979 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a702224f95a46a0c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_5158625469105371417", property.function_library_fingerprint = 9718517583011807971, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/10


I0000 00:00:1719415776.196148   19026 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(693523f255d2c53d:0:0), session_name()
I0000 00:00:1719415818.341451   19026 tpu_compile_op_common.cc:245] Compilation of 693523f255d2c53d:0:0 with session name  took 42.145266627s and succeeded
I0000 00:00:1719415818.440382   19026 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(693523f255d2c53d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9872901417910858362", property.function_library_fingerprint = 12386924205628269299, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7671300172805786 and accuracy: 0.8370535969734192


2024-06-26 15:31:07.911098: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719415868.643229   19007 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2542ee8f5c63293:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719415873.747566   19007 tpu_compile_op_common.cc:245] Compilation of 2542ee8f5c63293:0:0 with session name  took 5.104267596s and succeeded
I0000 00:00:1719415873.779807   19007 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2542ee8f5c63293:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_4176351568292491462", property.function_library_fingerprint = 2425895975492924101, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719415873.779845   19007 tpu_compilation_cache_interface.cc:541] After adding entry for key 2542ee8f5c63293:0:0



I0000 00:00:1719415874.965361   19015 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f391844920876e55:0:0), session_name()




I0000 00:00:1719415879.422960   19015 tpu_compile_op_common.cc:245] Compilation of f391844920876e55:0:0 with session name  took 4.457555226s and succeeded
I0000 00:00:1719415879.444690   19015 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f391844920876e55:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_4176351568292491462", property.function_library_fingerprint = 2425895975492924101, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719415879.444724   19015 tpu_compilation_cache_interface.cc:541] After adding entry for key f391844920876e55:

Epoch 1/3
Epoch 2/3


I0000 00:00:1719415881.245557   19015 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4e0d5f693c5dbeb8:0:0), session_name()
I0000 00:00:1719415924.656525   19015 tpu_compile_op_common.cc:245] Compilation of 4e0d5f693c5dbeb8:0:0 with session name  took 43.410931008s and succeeded
I0000 00:00:1719415924.756894   19015 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4e0d5f693c5dbeb8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9872901417910858362", property.function_library_fingerprint = 12386924205628269299, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/3


[I 2024-06-26 15:32:09,639] Trial 715 finished with value: 0.8370535969734192 and parameters: {'num_epochs': 10, 'dropout_rate': 0.19848425703201192, 'weight_decay': 0.024025262319969026, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7927169730097947}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 15:33:25.755980: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719416009.080393   18937 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b11a0ebb90932606:0:0), session_name()
I0000 00:00:1719416054.691726   18937 tpu_compile_op_common.cc:245] Compilation of b11a0ebb90932606:0:0 with session name  took 45.611284948s and succeeded
I0000 00:00:1719416054.821764   18937 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b11a0ebb90932606:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18191064196365457015", property.function_library_fingerprint = 12369531137579866654, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 15:34:28.642613: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719416069.376129   18969 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2664b0214b6ef6ea:0:0), session_name()
I0000 00:00:1719416074.843630   18969 tpu_compile_op_common.cc:245] Compilation of 2664b0214b6ef6ea:0:0 with session name  took 5.467459545s and succeeded
I0000 00:00:1719416074.875332   18969 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2664b0214b6ef6ea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_5609792382518738999", property.function_library_fingerprint = 14315799197904755081, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719416075.627307   18987 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d4df2af61e613749:0:0), session_name()
I0000 00:00:1719416118.639468   18987 tpu_compile_op_common.cc:245] Compilation of d4df2af61e613749:0:0 with session name  took 43.012111137s and succeeded
I0000 00:00:1719416118.763276   18987 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d4df2af61e613749:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18191064196365457015", property.function_library_fingerprint = 12369531137579866654, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7609356641769409 and accuracy: 0.8454241156578064


2024-06-26 15:36:04.487226: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719416165.179687   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f6a6e3ada74466cc:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719416170.326067   19011 tpu_compile_op_common.cc:245] Compilation of f6a6e3ada74466cc:0:0 with session name  took 5.146327705s and succeeded
I0000 00:00:1719416170.361077   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f6a6e3ada74466cc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5334562433020182432", property.function_library_fingerprint = 17378307756547019020, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719416170.361117   19011 tpu_compilation_cache_interface.cc:541] After adding entry for key f6a6e3ada74466cc



I0000 00:00:1719416171.528501   19015 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(98e84cc33c6fc0f1:0:0), session_name()




I0000 00:00:1719416176.349724   19015 tpu_compile_op_common.cc:245] Compilation of 98e84cc33c6fc0f1:0:0 with session name  took 4.821176121s and succeeded
I0000 00:00:1719416176.384617   19015 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(98e84cc33c6fc0f1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5334562433020182432", property.function_library_fingerprint = 17378307756547019020, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719416176.384679   19015 tpu_compilation_cache_interface.cc:541] After adding entry for key 98e84cc33c6fc0f1

Epoch 1/3
Epoch 2/3


I0000 00:00:1719416178.155393   18960 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ec50cd5d3b34ebc4:0:0), session_name()
I0000 00:00:1719416223.255503   18960 tpu_compile_op_common.cc:245] Compilation of ec50cd5d3b34ebc4:0:0 with session name  took 45.100069181s and succeeded
I0000 00:00:1719416223.363821   18960 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec50cd5d3b34ebc4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18191064196365457015", property.function_library_fingerprint = 12369531137579866654, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/3


[I 2024-06-26 15:37:08,180] Trial 724 finished with value: 0.8454241156578064 and parameters: {'num_epochs': 9, 'dropout_rate': 0.19332168615900372, 'weight_decay': 0.019766324771581422, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8389751959089359}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 15:38:22.817480: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719416306.332216   19022 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b435c67b9a432b50:0:0), session_name()
I0000 00:00:1719416352.992549   19022 tpu_compile_op_common.cc:245] Compilation of b435c67b9a432b50:0:0 with session name  took 46.660270418s and succeeded
I0000 00:00:1719416353.119236   19022 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b435c67b9a432b50:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6909083954615316599", property.function_library_fingerprint = 5939593246660975079, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 15:39:27.411980: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719416368.108533   19001 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5c72ce00b865bf29:0:0), session_name()
I0000 00:00:1719416373.875927   19001 tpu_compile_op_common.cc:245] Compilation of 5c72ce00b865bf29:0:0 with session name  took 5.76733839s and succeeded
I0000 00:00:1719416373.917966   19001 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5c72ce00b865bf29:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4059291637607523325", property.function_library_fingerprint = 17607076259423061057, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/12


I0000 00:00:1719416374.657295   18968 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bf98590cca8f3999:0:0), session_name()
I0000 00:00:1719416418.129983   18968 tpu_compile_op_common.cc:245] Compilation of bf98590cca8f3999:0:0 with session name  took 43.472644753s and succeeded
I0000 00:00:1719416418.236448   18968 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bf98590cca8f3999:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6909083954615316599", property.function_library_fingerprint = 5939593246660975079, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7957697510719299 and accuracy: 0.8448660969734192


2024-06-26 15:41:15.989101: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719416476.667888   18976 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(39b7341709c34024:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719416481.891000   18976 tpu_compile_op_common.cc:245] Compilation of 39b7341709c34024:0:0 with session name  took 5.223074338s and succeeded
I0000 00:00:1719416481.918004   18976 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39b7341709c34024:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15342262978532126745", property.function_library_fingerprint = 486395281719864172, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719416481.918040   18976 tpu_compilation_cache_interface.cc:541] After adding entry for key 39b7341709c34024:



I0000 00:00:1719416483.057628   18938 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c068d286e7b985dc:0:0), session_name()




I0000 00:00:1719416487.792365   18938 tpu_compile_op_common.cc:245] Compilation of c068d286e7b985dc:0:0 with session name  took 4.734700984s and succeeded
I0000 00:00:1719416487.814507   18938 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c068d286e7b985dc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15342262978532126745", property.function_library_fingerprint = 486395281719864172, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719416487.814547   18938 tpu_compilation_cache_interface.cc:541] After adding entry for key c068d286e7b985dc:

Epoch 1/4
Epoch 2/4


I0000 00:00:1719416489.554528   18964 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(38d30cd58a37d3d9:0:0), session_name()
I0000 00:00:1719416534.246890   18964 tpu_compile_op_common.cc:245] Compilation of 38d30cd58a37d3d9:0:0 with session name  took 44.692302831s and succeeded
I0000 00:00:1719416534.347937   18964 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(38d30cd58a37d3d9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6909083954615316599", property.function_library_fingerprint = 5939593246660975079, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/4
Epoch 4/4


[I 2024-06-26 15:42:20,953] Trial 734 finished with value: 0.8448660969734192 and parameters: {'num_epochs': 12, 'dropout_rate': 0.1797500630554275, 'weight_decay': 0.028569827018072737, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6917229850362319}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/15


2024-06-26 15:43:36.720775: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719416620.250532   19024 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(610b4d156255348b:0:0), session_name()
I0000 00:00:1719416666.329016   19024 tpu_compile_op_common.cc:245] Compilation of 610b4d156255348b:0:0 with session name  took 46.078440138s and succeeded
I0000 00:00:1719416666.431749   19024 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(610b4d156255348b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9599203627139499561", property.function_library_fingerprint = 14213707936824794216, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 15:44:40.533079: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719416681.246154   19013 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(72cecabc78cd5b1b:0:0), session_name()
I0000 00:00:1719416686.891049   19013 tpu_compile_op_common.cc:245] Compilation of 72cecabc78cd5b1b:0:0 with session name  took 5.64484391s and succeeded
I0000 00:00:1719416686.930797   19013 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(72cecabc78cd5b1b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_6297496083003725274", property.function_library_fingerprint = 14125469863560265990, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/15


I0000 00:00:1719416687.667144   18978 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(50cf933e62ac1f8c:0:0), session_name()
I0000 00:00:1719416731.171737   18978 tpu_compile_op_common.cc:245] Compilation of 50cf933e62ac1f8c:0:0 with session name  took 43.504551249s and succeeded
I0000 00:00:1719416731.298820   18978 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(50cf933e62ac1f8c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9599203627139499561", property.function_library_fingerprint = 14213707936824794216, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
f1 score: 0.7775144577026367 and accuracy: 0.8415178656578064


2024-06-26 15:46:42.400816: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719416803.133284   18940 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3a2322d6fb067dd2:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719416808.574817   18940 tpu_compile_op_common.cc:245] Compilation of 3a2322d6fb067dd2:0:0 with session name  took 5.441480589s and succeeded
I0000 00:00:1719416808.611416   18940 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3a2322d6fb067dd2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5299701035229018686", property.function_library_fingerprint = 9588801974083634952, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719416808.611459   18940 tpu_compilation_cache_interface.cc:541] After adding entry for key 3a2322d6fb067dd2:



I0000 00:00:1719416809.786401   18991 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b04373599c0fd5ff:0:0), session_name()




I0000 00:00:1719416814.731932   18991 tpu_compile_op_common.cc:245] Compilation of b04373599c0fd5ff:0:0 with session name  took 4.945484098s and succeeded
I0000 00:00:1719416814.759303   18991 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b04373599c0fd5ff:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5299701035229018686", property.function_library_fingerprint = 9588801974083634952, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719416814.759335   18991 tpu_compilation_cache_interface.cc:541] After adding entry for key b04373599c0fd5ff:

Epoch 1/5
Epoch 2/5


I0000 00:00:1719416816.501498   18962 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fbb3e1618bc490f9:0:0), session_name()
I0000 00:00:1719416861.568168   18962 tpu_compile_op_common.cc:245] Compilation of fbb3e1618bc490f9:0:0 with session name  took 45.066617668s and succeeded
I0000 00:00:1719416861.674541   18962 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fbb3e1618bc490f9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9599203627139499561", property.function_library_fingerprint = 14213707936824794216, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-06-26 15:47:48,397] Trial 743 finished with value: 0.8415178656578064 and parameters: {'num_epochs': 15, 'dropout_rate': 0.3090417739634119, 'weight_decay': 0.017279268261056115, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.6305686461237141}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/13


2024-06-26 15:49:05.002612: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719416948.298476   18950 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fe3cc2ab14f8ad36:0:0), session_name()
I0000 00:00:1719416994.715682   18950 tpu_compile_op_common.cc:245] Compilation of fe3cc2ab14f8ad36:0:0 with session name  took 46.417161767s and succeeded
I0000 00:00:1719416994.823130   18950 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fe3cc2ab14f8ad36:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1898528834140682934", property.function_library_fingerprint = 1325194176612872713, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 15:50:08.719662: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719417009.424214   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(88aa645d02014bc7:0:0), session_name()
I0000 00:00:1719417015.081372   18952 tpu_compile_op_common.cc:245] Compilation of 88aa645d02014bc7:0:0 with session name  took 5.657105237s and succeeded
I0000 00:00:1719417015.113509   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(88aa645d02014bc7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_1965537058266991385", property.function_library_fingerprint = 14071197819034308188, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/13


I0000 00:00:1719417015.882593   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ef8378ebf9853004:0:0), session_name()
I0000 00:00:1719417060.012105   19008 tpu_compile_op_common.cc:245] Compilation of ef8378ebf9853004:0:0 with session name  took 44.129471286s and succeeded
I0000 00:00:1719417060.137713   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ef8378ebf9853004:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1898528834140682934", property.function_library_fingerprint = 1325194176612872713, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7699845433235168 and accuracy: 0.8303571343421936


[I 2024-06-26 15:51:54,904] Trial 751 finished with value: 0.8303571343421936 and parameters: {'num_epochs': 13, 'dropout_rate': 0.28418222752843225, 'weight_decay': 0.023082552917762428, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.03109266487205209}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


2024-06-26 15:53:11.054991: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719417194.511081   18962 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e3580d4c192406ae:0:0), session_name()
I0000 00:00:1719417240.107570   18962 tpu_compile_op_common.cc:245] Compilation of e3580d4c192406ae:0:0 with session name  took 45.596432368s and succeeded
I0000 00:00:1719417240.214968   18962 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e3580d4c192406ae:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6852460935932473638", property.function_library_fingerprint = 6859350256306727221, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 15:54:14.014213: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719417254.735608   19023 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cae601ed4845830:0:0), session_name()
I0000 00:00:1719417260.278095   19023 tpu_compile_op_common.cc:245] Compilation of cae601ed4845830:0:0 with session name  took 5.542446039s and succeeded
I0000 00:00:1719417260.315002   19023 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cae601ed4845830:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_14642501584191010803", property.function_library_fingerprint = 8994308938602295370, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wr

Epoch 2/8


I0000 00:00:1719417261.081450   18985 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9fe2761f729a73db:0:0), session_name()
I0000 00:00:1719417304.305698   18985 tpu_compile_op_common.cc:245] Compilation of 9fe2761f729a73db:0:0 with session name  took 43.224208196s and succeeded
I0000 00:00:1719417304.407986   18985 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9fe2761f729a73db:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6852460935932473638", property.function_library_fingerprint = 6859350256306727221, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7583605051040649 and accuracy: 0.8487723469734192


2024-06-26 15:55:44.705197: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719417345.382589   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c5791a60ed47637d:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719417350.572813   18947 tpu_compile_op_common.cc:245] Compilation of c5791a60ed47637d:0:0 with session name  took 5.190181369s and succeeded
I0000 00:00:1719417350.612425   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c5791a60ed47637d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_849969485248288923", property.function_library_fingerprint = 3387910117600623112, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719417350.612462   18947 tpu_compilation_cache_interface.cc:541] After adding entry for key c5791a60ed47637d:0



I0000 00:00:1719417351.774605   18955 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bcd5a459aea1b923:0:0), session_name()




I0000 00:00:1719417356.737844   18955 tpu_compile_op_common.cc:245] Compilation of bcd5a459aea1b923:0:0 with session name  took 4.963201271s and succeeded
I0000 00:00:1719417356.762350   18955 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bcd5a459aea1b923:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_849969485248288923", property.function_library_fingerprint = 3387910117600623112, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719417356.762385   18955 tpu_compilation_cache_interface.cc:541] After adding entry for key bcd5a459aea1b923:0

Epoch 1/3
Epoch 2/3


I0000 00:00:1719417358.528654   18955 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8295f35987824a3b:0:0), session_name()
I0000 00:00:1719417402.764402   18955 tpu_compile_op_common.cc:245] Compilation of 8295f35987824a3b:0:0 with session name  took 44.235704636s and succeeded
I0000 00:00:1719417402.903225   18955 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8295f35987824a3b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6852460935932473638", property.function_library_fingerprint = 6859350256306727221, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/3
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_704_accuracy_0.8320_avg_score_0.8042_f1_0.7765_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_704_accuracy_0.8320_avg_score_0.8042_f1_0.7765_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 15:56:47,659] Trial 759 finished with value: 0.8487723469734192 and parameters: {'num_epochs': 8, 'dropout_rate': 0.20167543819352332, 'weight_decay': 0.025608890844982263, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6614023506901382}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


2024-06-26 15:58:06.174833: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719417489.570541   18969 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c0394827a010afba:0:0), session_name()
I0000 00:00:1719417536.307486   18969 tpu_compile_op_common.cc:245] Compilation of c0394827a010afba:0:0 with session name  took 46.736904082s and succeeded
I0000 00:00:1719417536.412744   18969 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c0394827a010afba:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14358239067829560279", property.function_library_fingerprint = 8795499270343475874, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 15:59:11.208629: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719417551.962010   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e8109c7e4590605c:0:0), session_name()
I0000 00:00:1719417557.582013   18956 tpu_compile_op_common.cc:245] Compilation of e8109c7e4590605c:0:0 with session name  took 5.619957585s and succeeded
I0000 00:00:1719417557.622861   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e8109c7e4590605c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10883153797550071087", property.function_library_fingerprint = 7382290343153803998, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/7


I0000 00:00:1719417558.429285   18999 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a34877ced8ff78c2:0:0), session_name()
I0000 00:00:1719417601.981906   18999 tpu_compile_op_common.cc:245] Compilation of a34877ced8ff78c2:0:0 with session name  took 43.552582018s and succeeded
I0000 00:00:1719417602.083169   18999 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a34877ced8ff78c2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14358239067829560279", property.function_library_fingerprint = 8795499270343475874, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7378398180007935 and accuracy: 0.7996651530265808


[I 2024-06-26 16:00:30,408] Trial 767 finished with value: 0.7996651530265808 and parameters: {'num_epochs': 7, 'dropout_rate': 0.2030025587965381, 'weight_decay': 0.024914281629071437, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7676995982343462}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


2024-06-26 16:01:47.868576: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719417711.260461   19000 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(48a7af9afee7b603:0:0), session_name()
I0000 00:00:1719417757.741594   19000 tpu_compile_op_common.cc:245] Compilation of 48a7af9afee7b603:0:0 with session name  took 46.481083864s and succeeded
I0000 00:00:1719417757.875067   19000 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(48a7af9afee7b603:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11138765454508084332", property.function_library_fingerprint = 17386399156738339168, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 16:02:51.921009: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719417772.658356   18958 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(65d0a053bbf30e0a:0:0), session_name()
I0000 00:00:1719417778.583671   18958 tpu_compile_op_common.cc:245] Compilation of 65d0a053bbf30e0a:0:0 with session name  took 5.925266215s and succeeded
I0000 00:00:1719417778.620842   18958 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(65d0a053bbf30e0a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_8271529891031357622", property.function_library_fingerprint = 17419954519832095748, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/7


I0000 00:00:1719417779.355803   19003 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(251a096d64048bd4:0:0), session_name()
I0000 00:00:1719417823.227933   19003 tpu_compile_op_common.cc:245] Compilation of 251a096d64048bd4:0:0 with session name  took 43.872094746s and succeeded
I0000 00:00:1719417823.333952   19003 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(251a096d64048bd4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11138765454508084332", property.function_library_fingerprint = 17386399156738339168, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.6521316170692444 and accuracy: 0.79296875


[I 2024-06-26 16:04:11,827] Trial 774 finished with value: 0.79296875 and parameters: {'num_epochs': 7, 'dropout_rate': 0.3365140592534319, 'weight_decay': 0.026584310328105938, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7380550873135706}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


2024-06-26 16:05:29.617309: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719417932.923964   19005 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dd3c5e355f3ad5d9:0:0), session_name()
I0000 00:00:1719417980.301705   19005 tpu_compile_op_common.cc:245] Compilation of dd3c5e355f3ad5d9:0:0 with session name  took 47.377688007s and succeeded
I0000 00:00:1719417980.436492   19005 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dd3c5e355f3ad5d9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17399438287128548518", property.function_library_fingerprint = 7351393571176776332, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 16:06:34.854469: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719417995.559481   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d741e84c0c0dcd7b:0:0), session_name()
I0000 00:00:1719418001.810457   18956 tpu_compile_op_common.cc:245] Compilation of d741e84c0c0dcd7b:0:0 with session name  took 6.250932501s and succeeded
I0000 00:00:1719418001.862663   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d741e84c0c0dcd7b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_15743086455946447576", property.function_library_fingerprint = 11370213297677955066, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/8


I0000 00:00:1719418002.626026   18994 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(298db654ebdd5344:0:0), session_name()
I0000 00:00:1719418049.771118   18994 tpu_compile_op_common.cc:245] Compilation of 298db654ebdd5344:0:0 with session name  took 47.145046002s and succeeded
I0000 00:00:1719418049.875976   18994 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(298db654ebdd5344:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17399438287128548518", property.function_library_fingerprint = 7351393571176776332, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7133194208145142 and accuracy: 0.8314732313156128


[I 2024-06-26 16:08:03,680] Trial 782 finished with value: 0.8314732313156128 and parameters: {'num_epochs': 8, 'dropout_rate': 0.3708043588280749, 'weight_decay': 0.02170592584816418, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.802127346794286}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 16:09:21.248492: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719418164.775299   18940 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3ff7d2e903b425b0:0:0), session_name()
I0000 00:00:1719418211.823526   18940 tpu_compile_op_common.cc:245] Compilation of 3ff7d2e903b425b0:0:0 with session name  took 47.048180979s and succeeded
I0000 00:00:1719418211.928445   18940 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3ff7d2e903b425b0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4068338401222672016", property.function_library_fingerprint = 7565397694954995253, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 16:10:25.825806: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719418226.527278   19000 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9038b339d1078b09:0:0), session_name()
I0000 00:00:1719418232.368438   19000 tpu_compile_op_common.cc:245] Compilation of 9038b339d1078b09:0:0 with session name  took 5.841108809s and succeeded
I0000 00:00:1719418232.420352   19000 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9038b339d1078b09:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_9233323629989643120", property.function_library_fingerprint = 10169960171596358470, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719418233.165406   18949 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d2d30fb29edebeac:0:0), session_name()
I0000 00:00:1719418278.717095   18949 tpu_compile_op_common.cc:245] Compilation of d2d30fb29edebeac:0:0 with session name  took 45.551648921s and succeeded
I0000 00:00:1719418278.849309   18949 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d2d30fb29edebeac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4068338401222672016", property.function_library_fingerprint = 7565397694954995253, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.6801572442054749 and accuracy: 0.8454241156578064


2024-06-26 16:12:09.435776: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719418330.126384   19022 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(db059cbb1fd68d51:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719418335.575835   19022 tpu_compile_op_common.cc:245] Compilation of db059cbb1fd68d51:0:0 with session name  took 5.449404325s and succeeded
I0000 00:00:1719418335.608166   19022 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(db059cbb1fd68d51:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14451535116021003634", property.function_library_fingerprint = 11674878901603354381, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719418335.608217   19022 tpu_compilation_cache_interface.cc:541] After adding entry for key db059cbb1fd68d5



I0000 00:00:1719418336.753451   18933 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(aed908ed6c603095:0:0), session_name()




I0000 00:00:1719418341.806556   18933 tpu_compile_op_common.cc:245] Compilation of aed908ed6c603095:0:0 with session name  took 5.053053005s and succeeded
I0000 00:00:1719418341.829376   18933 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aed908ed6c603095:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14451535116021003634", property.function_library_fingerprint = 11674878901603354381, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719418341.829413   18933 tpu_compilation_cache_interface.cc:541] After adding entry for key aed908ed6c60309

Epoch 1/3
Epoch 2/3


I0000 00:00:1719418343.603000   18987 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7756e68e88a0420b:0:0), session_name()
I0000 00:00:1719418390.219238   18987 tpu_compile_op_common.cc:245] Compilation of 7756e68e88a0420b:0:0 with session name  took 46.616191371s and succeeded
I0000 00:00:1719418390.327466   18987 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7756e68e88a0420b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4068338401222672016", property.function_library_fingerprint = 7565397694954995253, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/3
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_715_accuracy_0.8371_avg_score_0.8021_f1_0.7671_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_715_accuracy_0.8371_avg_score_0.8021_f1_0.7671_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 16:13:15,123] Trial 790 finished with value: 0.8454241156578064 and parameters: {'num_epochs': 10, 'dropout_rate': 0.30212652394101136, 'weight_decay': 0.01939630402661902, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.795626910298418}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 16:14:32.427101: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719418475.671064   18958 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c6a01c1a20369a61:0:0), session_name()
I0000 00:00:1719418521.946319   18958 tpu_compile_op_common.cc:245] Compilation of c6a01c1a20369a61:0:0 with session name  took 46.27520301s and succeeded
I0000 00:00:1719418522.075854   18958 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c6a01c1a20369a61:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9927984589757479187", property.function_library_fingerprint = 10703963519985048358, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 16:15:35.942107: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719418536.630413   19001 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b22ae4de0cc1d8eb:0:0), session_name()
I0000 00:00:1719418542.530819   19001 tpu_compile_op_common.cc:245] Compilation of b22ae4de0cc1d8eb:0:0 with session name  took 5.900361714s and succeeded
I0000 00:00:1719418542.580309   19001 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b22ae4de0cc1d8eb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_16744202077377805725", property.function_library_fingerprint = 4603645161846837760, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719418543.329124   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c7f61b2181d333ca:0:0), session_name()
I0000 00:00:1719418587.323753   19012 tpu_compile_op_common.cc:245] Compilation of c7f61b2181d333ca:0:0 with session name  took 43.994581937s and succeeded
I0000 00:00:1719418587.452905   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c7f61b2181d333ca:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9927984589757479187", property.function_library_fingerprint = 10703963519985048358, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7829943895339966 and accuracy: 0.8470982313156128


2024-06-26 16:17:17.736710: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719418638.434329   19009 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(405d688bb5fff66c:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719418643.922461   19009 tpu_compile_op_common.cc:245] Compilation of 405d688bb5fff66c:0:0 with session name  took 5.488086868s and succeeded
I0000 00:00:1719418643.954367   19009 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(405d688bb5fff66c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_4994957461564533375", property.function_library_fingerprint = 10108413506492708283, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719418643.954402   19009 tpu_compilation_cache_interface.cc:541] After adding entry for key 405d688bb5fff66c



I0000 00:00:1719418645.125854   18951 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(703bd22eb04cbc0a:0:0), session_name()




I0000 00:00:1719418650.146069   18951 tpu_compile_op_common.cc:245] Compilation of 703bd22eb04cbc0a:0:0 with session name  took 5.020180952s and succeeded
I0000 00:00:1719418650.170741   18951 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(703bd22eb04cbc0a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_4994957461564533375", property.function_library_fingerprint = 10108413506492708283, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719418650.170788   18951 tpu_compilation_cache_interface.cc:541] After adding entry for key 703bd22eb04cbc0a

Epoch 1/3
Epoch 2/3


I0000 00:00:1719418652.033795   18978 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2cfdd4f7ccda18f2:0:0), session_name()
I0000 00:00:1719418698.062021   18978 tpu_compile_op_common.cc:245] Compilation of 2cfdd4f7ccda18f2:0:0 with session name  took 46.02815435s and succeeded
I0000 00:00:1719418698.169668   18978 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2cfdd4f7ccda18f2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9927984589757479187", property.function_library_fingerprint = 10703963519985048358, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/3
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_743_accuracy_0.8415_avg_score_0.8095_f1_0.7775_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_743_accuracy_0.8415_avg_score_0.8095_f1_0.7775_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 16:18:23,018] Trial 799 finished with value: 0.8470982313156128 and parameters: {'num_epochs': 10, 'dropout_rate': 0.17289121040195876, 'weight_decay': 0.04783096093244263, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8326624915135054}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 16:19:38.754799: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719418782.245310   18970 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(50b293123d3d11c8:0:0), session_name()
I0000 00:00:1719418828.518247   18970 tpu_compile_op_common.cc:245] Compilation of 50b293123d3d11c8:0:0 with session name  took 46.272887079s and succeeded
I0000 00:00:1719418828.623148   18970 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(50b293123d3d11c8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4924057486556856903", property.function_library_fingerprint = 18167298992177148980, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 16:20:43.253054: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719418843.934910   18982 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cbf95b1382d8a7c7:0:0), session_name()
I0000 00:00:1719418849.660293   18982 tpu_compile_op_common.cc:245] Compilation of cbf95b1382d8a7c7:0:0 with session name  took 5.725343755s and succeeded
I0000 00:00:1719418849.695449   18982 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cbf95b1382d8a7c7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12748332115683436888", property.function_library_fingerprint = 5241736698971314418, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719418850.454993   19004 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(13f0b8e876907328:0:0), session_name()
I0000 00:00:1719418894.910649   19004 tpu_compile_op_common.cc:245] Compilation of 13f0b8e876907328:0:0 with session name  took 44.45561367s and succeeded
I0000 00:00:1719418895.039950   19004 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(13f0b8e876907328:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4924057486556856903", property.function_library_fingerprint = 18167298992177148980, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.6872454285621643 and accuracy: 0.8080357313156128


[I 2024-06-26 16:22:11,804] Trial 808 finished with value: 0.8080357313156128 and parameters: {'num_epochs': 9, 'dropout_rate': 0.3254912761058652, 'weight_decay': 0.04428094720944082, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.2032630430829826}. Best is trial 248 with value: 0.8543526530265808.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 16:23:29.118003: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719419012.484297   18976 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7c60f8b275370b70:0:0), session_name()
I0000 00:00:1719419060.693106   18976 tpu_compile_op_common.cc:245] Compilation of 7c60f8b275370b70:0:0 with session name  took 48.208762502s and succeeded
I0000 00:00:1719419060.826078   18976 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7c60f8b275370b70:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15666183884710004903", property.function_library_fingerprint = 11018305405171565277, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 16:24:34.631200: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719419075.305494   19016 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(17bb2683250d2570:0:0), session_name()
I0000 00:00:1719419081.189193   19016 tpu_compile_op_common.cc:245] Compilation of 17bb2683250d2570:0:0 with session name  took 5.883651539s and succeeded
I0000 00:00:1719419081.239039   19016 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(17bb2683250d2570:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_8476932791420627979", property.function_library_fingerprint = 16357119155331754568, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719419081.985672   19009 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(103f3fdeee3f653a:0:0), session_name()
I0000 00:00:1719419126.071969   19009 tpu_compile_op_common.cc:245] Compilation of 103f3fdeee3f653a:0:0 with session name  took 44.086256404s and succeeded
I0000 00:00:1719419126.180207   19009 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(103f3fdeee3f653a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15666183884710004903", property.function_library_fingerprint = 11018305405171565277, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7867853045463562 and accuracy: 0.8426339030265808


[I 2024-06-26 16:26:07,271] Trial 814 finished with value: 0.8426339030265808 and parameters: {'num_epochs': 10, 'dropout_rate': 0.16252454498787494, 'weight_decay': 0.09512094050848362, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8587023540772748}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 16:27:25.508660: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719419248.887811   18959 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fd791f75d0ec3d39:0:0), session_name()
I0000 00:00:1719419296.265932   18959 tpu_compile_op_common.cc:245] Compilation of fd791f75d0ec3d39:0:0 with session name  took 47.378066821s and succeeded
I0000 00:00:1719419296.398184   18959 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fd791f75d0ec3d39:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8631581682642749439", property.function_library_fingerprint = 14732156359795101519, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 16:28:30.737304: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719419311.440271   18944 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(689aa9d19c717a31:0:0), session_name()
I0000 00:00:1719419317.463960   18944 tpu_compile_op_common.cc:245] Compilation of 689aa9d19c717a31:0:0 with session name  took 6.023646898s and succeeded
I0000 00:00:1719419317.504733   18944 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(689aa9d19c717a31:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_14192392293640005825", property.function_library_fingerprint = 9208260244299865267, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719419318.225674   18962 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b56cc338ab0c704d:0:0), session_name()
I0000 00:00:1719419362.783957   18962 tpu_compile_op_common.cc:245] Compilation of b56cc338ab0c704d:0:0 with session name  took 44.558223962s and succeeded
I0000 00:00:1719419362.915676   18962 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b56cc338ab0c704d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8631581682642749439", property.function_library_fingerprint = 14732156359795101519, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7462426424026489 and accuracy: 0.7784598469734192


[I 2024-06-26 16:30:04,237] Trial 822 finished with value: 0.7784598469734192 and parameters: {'num_epochs': 10, 'dropout_rate': 0.14710684219700274, 'weight_decay': 0.047114198809059114, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4238951704207429}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 16:31:20.312046: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719419483.618690   18953 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cc9127995f98e76:0:0), session_name()
I0000 00:00:1719419530.916764   18953 tpu_compile_op_common.cc:245] Compilation of cc9127995f98e76:0:0 with session name  took 47.298024958s and succeeded
I0000 00:00:1719419531.023931   18953 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc9127995f98e76:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_54087235739093972", property.function_library_fingerprint = 9995785714082111748, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



2024-06-26 16:32:25.109569: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719419545.782635   18945 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8d9a2c50f986e617:0:0), session_name()
I0000 00:00:1719419551.782839   18945 tpu_compile_op_common.cc:245] Compilation of 8d9a2c50f986e617:0:0 with session name  took 6.000164751s and succeeded
I0000 00:00:1719419551.823053   18945 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8d9a2c50f986e617:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_5352023472218422577", property.function_library_fingerprint = 14561425272953381191, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719419552.574512   18966 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d94b9b0a2b3eff26:0:0), session_name()
I0000 00:00:1719419596.704168   18966 tpu_compile_op_common.cc:245] Compilation of d94b9b0a2b3eff26:0:0 with session name  took 44.129618656s and succeeded
I0000 00:00:1719419596.815089   18966 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d94b9b0a2b3eff26:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_54087235739093972", property.function_library_fingerprint = 9995785714082111748, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7903923988342285 and accuracy: 0.84375


[I 2024-06-26 16:34:03,202] Trial 830 finished with value: 0.84375 and parameters: {'num_epochs': 11, 'dropout_rate': 0.1815725945464053, 'weight_decay': 0.04615610647532555, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4360570339783644}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 16:35:19.127684: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719419722.660776   18981 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4f74db24fd2040c1:0:0), session_name()
I0000 00:00:1719419770.168820   18981 tpu_compile_op_common.cc:245] Compilation of 4f74db24fd2040c1:0:0 with session name  took 47.508000106s and succeeded
I0000 00:00:1719419770.269972   18981 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4f74db24fd2040c1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_885403794861895863", property.function_library_fingerprint = 14152479249360220797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 16:36:24.261844: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719419784.958190   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(67e52a40f3d747c8:0:0), session_name()
I0000 00:00:1719419791.009340   18952 tpu_compile_op_common.cc:245] Compilation of 67e52a40f3d747c8:0:0 with session name  took 6.051102853s and succeeded
I0000 00:00:1719419791.053392   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(67e52a40f3d747c8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_15841720108626737540", property.function_library_fingerprint = 11238409846421277731, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/9


I0000 00:00:1719419791.802779   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2f9ecd8f8dddcace:0:0), session_name()
I0000 00:00:1719419837.709930   18995 tpu_compile_op_common.cc:245] Compilation of 2f9ecd8f8dddcace:0:0 with session name  took 45.907107173s and succeeded
I0000 00:00:1719419837.844347   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2f9ecd8f8dddcace:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_885403794861895863", property.function_library_fingerprint = 14152479249360220797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7660755515098572 and accuracy: 0.8537946343421936


2024-06-26 16:38:04.300823: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719419885.010415   19002 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(31a7729ef62810ce:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719419890.676942   19002 tpu_compile_op_common.cc:245] Compilation of 31a7729ef62810ce:0:0 with session name  took 5.666456248s and succeeded
I0000 00:00:1719419890.712815   19002 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(31a7729ef62810ce:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_426522495356266486", property.function_library_fingerprint = 10211641420136166582, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719419890.712877   19002 tpu_compilation_cache_interface.cc:541] After adding entry for key 31a7729ef62810ce:



I0000 00:00:1719419891.864111   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(998854f4fbb1d6be:0:0), session_name()




I0000 00:00:1719419897.055510   19011 tpu_compile_op_common.cc:245] Compilation of 998854f4fbb1d6be:0:0 with session name  took 5.191358271s and succeeded
I0000 00:00:1719419897.085618   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(998854f4fbb1d6be:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_426522495356266486", property.function_library_fingerprint = 10211641420136166582, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719419897.085654   19011 tpu_compilation_cache_interface.cc:541] After adding entry for key 998854f4fbb1d6be:

Epoch 1/3
Epoch 2/3


I0000 00:00:1719419898.873548   18957 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(efea9b05bd83fcac:0:0), session_name()
I0000 00:00:1719419943.964334   18957 tpu_compile_op_common.cc:245] Compilation of efea9b05bd83fcac:0:0 with session name  took 45.090744243s and succeeded
I0000 00:00:1719419944.064404   18957 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(efea9b05bd83fcac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_885403794861895863", property.function_library_fingerprint = 14152479249360220797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/3
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_734_accuracy_0.8449_avg_score_0.8203_f1_0.7958_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_734_accuracy_0.8449_avg_score_0.8203_f1_0.7958_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 16:39:08,927] Trial 838 finished with value: 0.8537946343421936 and parameters: {'num_epochs': 9, 'dropout_rate': 0.21687112294615143, 'weight_decay': 0.041974447261415755, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8257027419703267}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 16:40:26.892721: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719420030.345587   18943 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e698e167d08ce689:0:0), session_name()
I0000 00:00:1719420078.346283   18943 tpu_compile_op_common.cc:245] Compilation of e698e167d08ce689:0:0 with session name  took 48.000644124s and succeeded
I0000 00:00:1719420078.474214   18943 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e698e167d08ce689:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15952177238402945653", property.function_library_fingerprint = 8022035920279648712, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 16:41:32.912084: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719420093.588114   18953 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3ed6a4782697587d:0:0), session_name()
I0000 00:00:1719420099.910322   18953 tpu_compile_op_common.cc:245] Compilation of 3ed6a4782697587d:0:0 with session name  took 6.322148073s and succeeded
I0000 00:00:1719420099.939173   18953 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3ed6a4782697587d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_7773629461224543237", property.function_library_fingerprint = 17019028230374339439, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719420100.664295   18953 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(35fa36c330366662:0:0), session_name()
I0000 00:00:1719420146.197712   18953 tpu_compile_op_common.cc:245] Compilation of 35fa36c330366662:0:0 with session name  took 45.533373839s and succeeded
I0000 00:00:1719420146.299399   18953 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(35fa36c330366662:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15952177238402945653", property.function_library_fingerprint = 8022035920279648712, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7073718309402466 and accuracy: 0.8448660969734192


[I 2024-06-26 16:43:04,250] Trial 847 finished with value: 0.8448660969734192 and parameters: {'num_epochs': 9, 'dropout_rate': 0.22167522067177328, 'weight_decay': 0.042241300652411023, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.31461129422704115}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 16:44:19.641233: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719420263.255479   19005 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d1cbe984a7a934f5:0:0), session_name()
I0000 00:00:1719420312.584173   19005 tpu_compile_op_common.cc:245] Compilation of d1cbe984a7a934f5:0:0 with session name  took 49.328644646s and succeeded
I0000 00:00:1719420312.693794   19005 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d1cbe984a7a934f5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_609046839838558354", property.function_library_fingerprint = 2083955655212337567, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, t



2024-06-26 16:45:26.838756: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719420327.528957   19000 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fee90b6a049686d4:0:0), session_name()
I0000 00:00:1719420333.528306   19000 tpu_compile_op_common.cc:245] Compilation of fee90b6a049686d4:0:0 with session name  took 5.999295411s and succeeded
I0000 00:00:1719420333.580660   19000 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fee90b6a049686d4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_13520143319520037904", property.function_library_fingerprint = 11065901195883408749, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719420334.294785   18973 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1e3d78c5d900e45d:0:0), session_name()
I0000 00:00:1719420380.471692   18973 tpu_compile_op_common.cc:245] Compilation of 1e3d78c5d900e45d:0:0 with session name  took 46.176867971s and succeeded
I0000 00:00:1719420380.577097   18973 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1e3d78c5d900e45d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_609046839838558354", property.function_library_fingerprint = 2083955655212337567, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7721310257911682 and accuracy: 0.81640625


[I 2024-06-26 16:47:04,447] Trial 855 finished with value: 0.81640625 and parameters: {'num_epochs': 10, 'dropout_rate': 0.22977267337900553, 'weight_decay': 0.041711204280571006, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8070336563329078}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 16:48:22.203933: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719420505.703393   18961 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(90e258a19e395924:0:0), session_name()
I0000 00:00:1719420556.035170   18961 tpu_compile_op_common.cc:245] Compilation of 90e258a19e395924:0:0 with session name  took 50.331724923s and succeeded
I0000 00:00:1719420556.146006   18961 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(90e258a19e395924:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13451020975272094792", property.function_library_fingerprint = 8308486486053337024, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 16:49:31.160710: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719420571.857001   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(121be927fde330dc:0:0), session_name()
I0000 00:00:1719420578.152080   19012 tpu_compile_op_common.cc:245] Compilation of 121be927fde330dc:0:0 with session name  took 6.295021149s and succeeded
I0000 00:00:1719420578.207626   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(121be927fde330dc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12501663933250189005", property.function_library_fingerprint = 17975659868390990341, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/12


I0000 00:00:1719420578.949225   18974 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3381910fcc5ae732:0:0), session_name()
I0000 00:00:1719420625.730071   18974 tpu_compile_op_common.cc:245] Compilation of 3381910fcc5ae732:0:0 with session name  took 46.780787356s and succeeded
I0000 00:00:1719420625.839803   18974 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3381910fcc5ae732:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13451020975272094792", property.function_library_fingerprint = 8308486486053337024, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7893791198730469 and accuracy: 0.8537946343421936


2024-06-26 16:51:26.268352: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719420686.953349   18959 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ef176a1bc138b2aa:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719420692.795655   18959 tpu_compile_op_common.cc:245] Compilation of ef176a1bc138b2aa:0:0 with session name  took 5.842259432s and succeeded
I0000 00:00:1719420692.829255   18959 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ef176a1bc138b2aa:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8122771809461791517", property.function_library_fingerprint = 10796704344134879159, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719420692.829319   18959 tpu_compilation_cache_interface.cc:541] After adding entry for key ef176a1bc138b2aa



I0000 00:00:1719420693.995276   18959 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4f54d087dc4bfb24:0:0), session_name()




I0000 00:00:1719420699.419641   18959 tpu_compile_op_common.cc:245] Compilation of 4f54d087dc4bfb24:0:0 with session name  took 5.424321949s and succeeded
I0000 00:00:1719420699.445036   18959 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4f54d087dc4bfb24:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8122771809461791517", property.function_library_fingerprint = 10796704344134879159, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719420699.445086   18959 tpu_compilation_cache_interface.cc:541] After adding entry for key 4f54d087dc4bfb24

Epoch 1/4
Epoch 2/4


I0000 00:00:1719420701.355374   18980 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(507bcebe8f41d499:0:0), session_name()
I0000 00:00:1719420748.509860   18980 tpu_compile_op_common.cc:245] Compilation of 507bcebe8f41d499:0:0 with session name  took 47.154449024s and succeeded
I0000 00:00:1719420748.618378   18980 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(507bcebe8f41d499:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13451020975272094792", property.function_library_fingerprint = 8308486486053337024, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_790_accuracy_0.8454_avg_score_0.7628_f1_0.6802_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_790_accuracy_0.8454_avg_score_0.7628_f1_0.6802_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 16:52:34,641] Trial 863 finished with value: 0.8537946343421936 and parameters: {'num_epochs': 12, 'dropout_rate': 0.20125077597202645, 'weight_decay': 0.05002057627679169, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7527823969978569}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 16:53:57.378591: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719420840.983481   19021 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(98f19a61ed8f1cda:0:0), session_name()
I0000 00:00:1719420889.838508   19021 tpu_compile_op_common.cc:245] Compilation of 98f19a61ed8f1cda:0:0 with session name  took 48.854979422s and succeeded
I0000 00:00:1719420889.946123   19021 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(98f19a61ed8f1cda:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2602549071917313855", property.function_library_fingerprint = 15854877707687927677, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 16:55:04.619225: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719420905.429439   18973 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a9a693c349969666:0:0), session_name()
I0000 00:00:1719420911.867886   18973 tpu_compile_op_common.cc:245] Compilation of a9a693c349969666:0:0 with session name  took 6.438395916s and succeeded
I0000 00:00:1719420911.922033   18973 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a9a693c349969666:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_15575741763307093748", property.function_library_fingerprint = 7289550812068536104, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/12


I0000 00:00:1719420912.689525   18932 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(51e7becc2202680e:0:0), session_name()
I0000 00:00:1719420958.709208   18932 tpu_compile_op_common.cc:245] Compilation of 51e7becc2202680e:0:0 with session name  took 46.019637752s and succeeded
I0000 00:00:1719420958.845110   18932 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(51e7becc2202680e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2602549071917313855", property.function_library_fingerprint = 15854877707687927677, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7791837453842163 and accuracy: 0.8348214030265808


[I 2024-06-26 16:56:51,581] Trial 871 finished with value: 0.8348214030265808 and parameters: {'num_epochs': 12, 'dropout_rate': 0.20897419813790158, 'weight_decay': 0.04953264368794192, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7353136984371227}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 16:58:12.208689: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719421095.860172   18960 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(73da65a76ae9c288:0:0), session_name()
I0000 00:00:1719421143.898631   18960 tpu_compile_op_common.cc:245] Compilation of 73da65a76ae9c288:0:0 with session name  took 48.038411596s and succeeded
I0000 00:00:1719421144.012065   18960 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(73da65a76ae9c288:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12689608579629111653", property.function_library_fingerprint = 14292731064942646527, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 16:59:18.691207: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719421159.435623   18933 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(43cc8a479edc9c8f:0:0), session_name()
I0000 00:00:1719421165.937394   18933 tpu_compile_op_common.cc:245] Compilation of 43cc8a479edc9c8f:0:0 with session name  took 6.501719005s and succeeded
I0000 00:00:1719421165.975988   18933 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(43cc8a479edc9c8f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_3481827205497766138", property.function_library_fingerprint = 15974854460168387148, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/12


I0000 00:00:1719421166.834056   18989 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6fecd9078acaa937:0:0), session_name()
I0000 00:00:1719421213.433189   18989 tpu_compile_op_common.cc:245] Compilation of 6fecd9078acaa937:0:0 with session name  took 46.599084084s and succeeded
I0000 00:00:1719421213.563079   18989 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6fecd9078acaa937:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12689608579629111653", property.function_library_fingerprint = 14292731064942646527, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7963110208511353 and accuracy: 0.828125


[I 2024-06-26 17:01:05,904] Trial 879 finished with value: 0.828125 and parameters: {'num_epochs': 12, 'dropout_rate': 0.21339637685791982, 'weight_decay': 0.06021059508904579, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.22136492676248237}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 17:02:25.816394: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719421349.370248   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(aae308d171f867e7:0:0), session_name()
I0000 00:00:1719421399.324748   19008 tpu_compile_op_common.cc:245] Compilation of aae308d171f867e7:0:0 with session name  took 49.954423981s and succeeded
I0000 00:00:1719421399.456720   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aae308d171f867e7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1198695521281548595", property.function_library_fingerprint = 10616978956387214058, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:03:34.331481: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719421415.045279   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ec5a93c60c144575:0:0), session_name()
I0000 00:00:1719421421.859097   19012 tpu_compile_op_common.cc:245] Compilation of ec5a93c60c144575:0:0 with session name  took 6.81376117s and succeeded
I0000 00:00:1719421421.911152   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec5a93c60c144575:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_11053554891427367446", property.function_library_fingerprint = 10137029724830838539, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719421422.654090   18951 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8c82c4fdbdfedfb3:0:0), session_name()
I0000 00:00:1719421469.617120   18951 tpu_compile_op_common.cc:245] Compilation of 8c82c4fdbdfedfb3:0:0 with session name  took 46.962988675s and succeeded
I0000 00:00:1719421469.748326   18951 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8c82c4fdbdfedfb3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1198695521281548595", property.function_library_fingerprint = 10616978956387214058, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7743464112281799 and accuracy: 0.8420758843421936


[I 2024-06-26 17:05:18,353] Trial 887 finished with value: 0.8420758843421936 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2437368032297042, 'weight_decay': 0.01863956836495318, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7002735821605274}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 17:06:39.290002: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719421602.839010   18972 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a4e67afe25b3e446:0:0), session_name()
I0000 00:00:1719421653.881319   18972 tpu_compile_op_common.cc:245] Compilation of a4e67afe25b3e446:0:0 with session name  took 51.042253738s and succeeded
I0000 00:00:1719421654.021756   18972 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a4e67afe25b3e446:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5668865621841045226", property.function_library_fingerprint = 17775873162264958872, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:07:48.894230: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719421669.663718   18959 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6010cf421331ed97:0:0), session_name()
I0000 00:00:1719421676.335007   18959 tpu_compile_op_common.cc:245] Compilation of 6010cf421331ed97:0:0 with session name  took 6.67122002s and succeeded
I0000 00:00:1719421676.389003   18959 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6010cf421331ed97:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_18283799471810375166", property.function_library_fingerprint = 1083000262392741497, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/9


I0000 00:00:1719421677.145806   18986 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b9fb97a3339580c4:0:0), session_name()
I0000 00:00:1719421724.894159   18986 tpu_compile_op_common.cc:245] Compilation of b9fb97a3339580c4:0:0 with session name  took 47.748308708s and succeeded
I0000 00:00:1719421725.024174   18986 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b9fb97a3339580c4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5668865621841045226", property.function_library_fingerprint = 17775873162264958872, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7162229418754578 and accuracy: 0.8348214030265808


[I 2024-06-26 17:09:23,451] Trial 894 finished with value: 0.8348214030265808 and parameters: {'num_epochs': 9, 'dropout_rate': 0.313875716167674, 'weight_decay': 0.04021378965908478, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.7443865312035878}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 17:10:46.145043: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719421849.847313   18960 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(891df74433abc429:0:0), session_name()
I0000 00:00:1719421901.698805   18960 tpu_compile_op_common.cc:245] Compilation of 891df74433abc429:0:0 with session name  took 51.851421569s and succeeded
I0000 00:00:1719421901.818943   18960 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(891df74433abc429:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2211242371061182608", property.function_library_fingerprint = 14148445681363471744, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:11:56.422171: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719421917.104585   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e3efb44355defb47:0:0), session_name()
I0000 00:00:1719421923.950554   19011 tpu_compile_op_common.cc:245] Compilation of e3efb44355defb47:0:0 with session name  took 6.845926139s and succeeded
I0000 00:00:1719421923.996924   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e3efb44355defb47:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_16131290673179264196", property.function_library_fingerprint = 1135185698053113095, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719421924.758845   18955 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f86061fff8ba232a:0:0), session_name()
I0000 00:00:1719421972.985755   18955 tpu_compile_op_common.cc:245] Compilation of f86061fff8ba232a:0:0 with session name  took 48.226856489s and succeeded
I0000 00:00:1719421973.103660   18955 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f86061fff8ba232a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2211242371061182608", property.function_library_fingerprint = 14148445681363471744, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7735684514045715 and accuracy: 0.8431919813156128


[I 2024-06-26 17:13:41,353] Trial 902 finished with value: 0.8431919813156128 and parameters: {'num_epochs': 11, 'dropout_rate': 0.238645480699397, 'weight_decay': 0.015626087976192526, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7882895167528239}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/17


2024-06-26 17:15:01.380243: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719422105.006681   18967 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ebfc5a23dd614359:0:0), session_name()
I0000 00:00:1719422156.708821   18967 tpu_compile_op_common.cc:245] Compilation of ebfc5a23dd614359:0:0 with session name  took 51.702093379s and succeeded
I0000 00:00:1719422156.832220   18967 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ebfc5a23dd614359:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6455395573811602614", property.function_library_fingerprint = 16390794798608582136, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:16:12.576027: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719422173.369006   18939 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cc04d5de64bcd11c:0:0), session_name()
I0000 00:00:1719422180.125982   18939 tpu_compile_op_common.cc:245] Compilation of cc04d5de64bcd11c:0:0 with session name  took 6.756919327s and succeeded
I0000 00:00:1719422180.173370   18939 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc04d5de64bcd11c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10073283813191101191", property.function_library_fingerprint = 15629537670347323627, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/17


I0000 00:00:1719422180.955316   18966 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2e42f634bb7e8ac6:0:0), session_name()
I0000 00:00:1719422227.970658   18966 tpu_compile_op_common.cc:245] Compilation of 2e42f634bb7e8ac6:0:0 with session name  took 47.015295362s and succeeded
I0000 00:00:1719422228.107660   18966 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2e42f634bb7e8ac6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6455395573811602614", property.function_library_fingerprint = 16390794798608582136, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/17
Epoch 4/17
Epoch 5/17
Epoch 6/17
Epoch 7/17
Epoch 8/17
Epoch 9/17
Epoch 10/17
Epoch 11/17
Epoch 12/17
Epoch 13/17
Epoch 14/17
Epoch 15/17
Epoch 16/17
Epoch 17/17
f1 score: 0.7658852934837341 and accuracy: 0.8325892686843872


[I 2024-06-26 17:18:22,149] Trial 910 finished with value: 0.8325892686843872 and parameters: {'num_epochs': 17, 'dropout_rate': 0.35982456873787017, 'weight_decay': 0.022487666388654213, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6846906589844972}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/13


2024-06-26 17:19:47.119016: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719422390.751540   18984 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(29b8d6aef257f5e:0:0), session_name()
I0000 00:00:1719422440.991480   18984 tpu_compile_op_common.cc:245] Compilation of 29b8d6aef257f5e:0:0 with session name  took 50.239898132s and succeeded
I0000 00:00:1719422441.125859   18984 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(29b8d6aef257f5e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6852929846789964765", property.function_library_fingerprint = 6551985100999923607, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



2024-06-26 17:20:55.566319: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719422456.288657   18975 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e87f046401cd4296:0:0), session_name()
I0000 00:00:1719422462.948416   18975 tpu_compile_op_common.cc:245] Compilation of e87f046401cd4296:0:0 with session name  took 6.65971131s and succeeded
I0000 00:00:1719422462.997571   18975 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e87f046401cd4296:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_8911065991270711087", property.function_library_fingerprint = 3786055275093735270, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.w

Epoch 2/13


I0000 00:00:1719422463.735480   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f4fa843522d2047e:0:0), session_name()
I0000 00:00:1719422513.791612   18995 tpu_compile_op_common.cc:245] Compilation of f4fa843522d2047e:0:0 with session name  took 50.056078133s and succeeded
I0000 00:00:1719422513.925586   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f4fa843522d2047e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6852929846789964765", property.function_library_fingerprint = 6551985100999923607, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7842164635658264 and accuracy: 0.8353794813156128


[I 2024-06-26 17:22:50,588] Trial 918 finished with value: 0.8353794813156128 and parameters: {'num_epochs': 13, 'dropout_rate': 0.20341706712255914, 'weight_decay': 0.0180796416554471, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7605529711241203}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/13


2024-06-26 17:24:11.997125: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719422655.722629   18933 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d962f9d650702d4f:0:0), session_name()
I0000 00:00:1719422707.039660   18933 tpu_compile_op_common.cc:245] Compilation of d962f9d650702d4f:0:0 with session name  took 51.316977801s and succeeded
I0000 00:00:1719422707.178665   18933 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d962f9d650702d4f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12823017795483243013", property.function_library_fingerprint = 12720237788776513170, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 17:25:21.750210: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719422722.489577   18948 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(effd9224883999c5:0:0), session_name()
I0000 00:00:1719422729.356275   18948 tpu_compile_op_common.cc:245] Compilation of effd9224883999c5:0:0 with session name  took 6.866661066s and succeeded
I0000 00:00:1719422729.403439   18948 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(effd9224883999c5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_9203360677086333965", property.function_library_fingerprint = 15608984374667487652, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/13


I0000 00:00:1719422730.157411   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(752a153a668ced6:0:0), session_name()
I0000 00:00:1719422777.951361   19008 tpu_compile_op_common.cc:245] Compilation of 752a153a668ced6:0:0 with session name  took 47.793894458s and succeeded
I0000 00:00:1719422778.080367   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(752a153a668ced6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12823017795483243013", property.function_library_fingerprint = 12720237788776513170, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7375469207763672 and accuracy: 0.8270089030265808


[I 2024-06-26 17:27:14,620] Trial 926 finished with value: 0.8270089030265808 and parameters: {'num_epochs': 13, 'dropout_rate': 0.37393350607507275, 'weight_decay': 0.025090433022253562, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3219871496000983}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


2024-06-26 17:28:38.408025: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719422922.111061   18985 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9cf032f8b5a8717f:0:0), session_name()
I0000 00:00:1719422972.973764   18985 tpu_compile_op_common.cc:245] Compilation of 9cf032f8b5a8717f:0:0 with session name  took 50.862653405s and succeeded
I0000 00:00:1719422973.106009   18985 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9cf032f8b5a8717f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15519511331156460602", property.function_library_fingerprint = 11583780170378345456, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 17:29:47.966062: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719422988.755682   19005 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f3a64ee504981653:0:0), session_name()
I0000 00:00:1719422995.602984   19005 tpu_compile_op_common.cc:245] Compilation of f3a64ee504981653:0:0 with session name  took 6.847240137s and succeeded
I0000 00:00:1719422995.654589   19005 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f3a64ee504981653:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_3692653789219386056", property.function_library_fingerprint = 2185433521673908135, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/20


I0000 00:00:1719422996.401585   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(75418c81788d018:0:0), session_name()
I0000 00:00:1719423042.326242   18956 tpu_compile_op_common.cc:245] Compilation of 75418c81788d018:0:0 with session name  took 45.924603265s and succeeded
I0000 00:00:1719423042.433339   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(75418c81788d018:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15519511331156460602", property.function_library_fingerprint = 11583780170378345456, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
f1 score: 0.8173791170120239 and accuracy: 0.8080357313156128


[I 2024-06-26 17:32:11,568] Trial 934 finished with value: 0.8080357313156128 and parameters: {'num_epochs': 20, 'dropout_rate': 0.2222575772958272, 'weight_decay': 0.028265617347260203, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7259013181338788}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 17:33:38.190301: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719423221.650317   19015 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4867bfc65cf6bb5c:0:0), session_name()
I0000 00:00:1719423271.851860   19015 tpu_compile_op_common.cc:245] Compilation of 4867bfc65cf6bb5c:0:0 with session name  took 50.201484054s and succeeded
I0000 00:00:1719423271.960751   19015 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4867bfc65cf6bb5c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7129877623913839169", property.function_library_fingerprint = 10050834528128413956, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:34:46.593055: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719423287.323105   18978 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5e7ca9980202dbf8:0:0), session_name()
I0000 00:00:1719423294.103232   18978 tpu_compile_op_common.cc:245] Compilation of 5e7ca9980202dbf8:0:0 with session name  took 6.780078768s and succeeded
I0000 00:00:1719423294.157409   18978 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5e7ca9980202dbf8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4488356992647431285", property.function_library_fingerprint = 16756285510037564815, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719423294.957607   18943 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(46b433a24cd1ad50:0:0), session_name()
I0000 00:00:1719423342.496213   18943 tpu_compile_op_common.cc:245] Compilation of 46b433a24cd1ad50:0:0 with session name  took 47.538561615s and succeeded
I0000 00:00:1719423342.629769   18943 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(46b433a24cd1ad50:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7129877623913839169", property.function_library_fingerprint = 10050834528128413956, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7926082611083984 and accuracy: 0.8247767686843872


[I 2024-06-26 17:36:21,782] Trial 942 finished with value: 0.8247767686843872 and parameters: {'num_epochs': 9, 'dropout_rate': 0.12392503142009906, 'weight_decay': 0.020972077926720935, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.572526086090917}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 17:37:43.901653: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719423467.434266   18989 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6007d3924b3990c3:0:0), session_name()
I0000 00:00:1719423518.853675   18989 tpu_compile_op_common.cc:245] Compilation of 6007d3924b3990c3:0:0 with session name  took 51.419340815s and succeeded
I0000 00:00:1719423518.990595   18989 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6007d3924b3990c3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_763930543807477731", property.function_library_fingerprint = 3229973110022003929, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, t



2024-06-26 17:38:54.201596: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719423535.006629   18985 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4e071a82395586ce:0:0), session_name()
I0000 00:00:1719423541.805367   18985 tpu_compile_op_common.cc:245] Compilation of 4e071a82395586ce:0:0 with session name  took 6.798687154s and succeeded
I0000 00:00:1719423541.855810   18985 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4e071a82395586ce:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4056917688059871759", property.function_library_fingerprint = 14897597723473082546, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719423542.593621   18936 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c33f651ae2b8c352:0:0), session_name()
I0000 00:00:1719423591.309276   18936 tpu_compile_op_common.cc:245] Compilation of c33f651ae2b8c352:0:0 with session name  took 48.715607301s and succeeded
I0000 00:00:1719423591.420627   18936 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c33f651ae2b8c352:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_763930543807477731", property.function_library_fingerprint = 3229973110022003929, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7318550944328308 and accuracy: 0.7974330186843872


[I 2024-06-26 17:40:34,306] Trial 950 finished with value: 0.7974330186843872 and parameters: {'num_epochs': 10, 'dropout_rate': 0.18738471597596867, 'weight_decay': 0.013518819971603908, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7782804622083082}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 17:41:58.686534: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719423722.078196   18946 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7e0e11312282d44b:0:0), session_name()
I0000 00:00:1719423771.510597   18946 tpu_compile_op_common.cc:245] Compilation of 7e0e11312282d44b:0:0 with session name  took 49.432325955s and succeeded
I0000 00:00:1719423771.624569   18946 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7e0e11312282d44b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1814388936435369939", property.function_library_fingerprint = 16473286440693583162, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:43:06.427689: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719423787.126315   18964 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(298d013dfd0d0cdf:0:0), session_name()
I0000 00:00:1719423793.683394   18964 tpu_compile_op_common.cc:245] Compilation of 298d013dfd0d0cdf:0:0 with session name  took 6.557036089s and succeeded
I0000 00:00:1719423793.733987   18964 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(298d013dfd0d0cdf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_3408171229088684134", property.function_library_fingerprint = 7974153634989416375, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/11


I0000 00:00:1719423794.522656   18953 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cd422f379c093bcd:0:0), session_name()
I0000 00:00:1719423842.322243   18953 tpu_compile_op_common.cc:245] Compilation of cd422f379c093bcd:0:0 with session name  took 47.799526508s and succeeded
I0000 00:00:1719423842.430649   18953 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cd422f379c093bcd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1814388936435369939", property.function_library_fingerprint = 16473286440693583162, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7680622339248657 and accuracy: 0.8359375


[I 2024-06-26 17:44:50,643] Trial 957 finished with value: 0.8359375 and parameters: {'num_epochs': 11, 'dropout_rate': 0.28634710543681635, 'weight_decay': 0.01723963687640691, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8038039771295291}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 17:46:12.678564: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719423976.404190   18978 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(19ce7272d4426d89:0:0), session_name()
I0000 00:00:1719424026.015055   18978 tpu_compile_op_common.cc:245] Compilation of 19ce7272d4426d89:0:0 with session name  took 49.610772499s and succeeded
I0000 00:00:1719424026.146248   18978 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(19ce7272d4426d89:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4548352889114404628", property.function_library_fingerprint = 14253970102296563542, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:47:20.697054: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719424041.486908   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bf86775a61d48a51:0:0), session_name()
I0000 00:00:1719424048.095912   18947 tpu_compile_op_common.cc:245] Compilation of bf86775a61d48a51:0:0 with session name  took 6.608897631s and succeeded
I0000 00:00:1719424048.140361   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bf86775a61d48a51:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4275853377670968942", property.function_library_fingerprint = 6355136527667657900, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/11


I0000 00:00:1719424048.969670   18984 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3e934dcc26a6d490:0:0), session_name()
I0000 00:00:1719424096.129845   18984 tpu_compile_op_common.cc:245] Compilation of 3e934dcc26a6d490:0:0 with session name  took 47.160118259s and succeeded
I0000 00:00:1719424096.234403   18984 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3e934dcc26a6d490:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4548352889114404628", property.function_library_fingerprint = 14253970102296563542, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7415890693664551 and accuracy: 0.8409598469734192


[I 2024-06-26 17:49:03,439] Trial 965 finished with value: 0.8409598469734192 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2976752014758267, 'weight_decay': 0.050978592529740754, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4140685535363084}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


2024-06-26 17:50:28.490972: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719424232.087872   18935 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(478393f75e96e655:0:0), session_name()
I0000 00:00:1719424282.695859   18935 tpu_compile_op_common.cc:245] Compilation of 478393f75e96e655:0:0 with session name  took 50.607929021s and succeeded
I0000 00:00:1719424282.807649   18935 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(478393f75e96e655:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10600968335833774855", property.function_library_fingerprint = 15421548863868879518, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 17:51:37.572406: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719424298.270020   18997 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(529801d3f3ac150f:0:0), session_name()
I0000 00:00:1719424305.202459   18997 tpu_compile_op_common.cc:245] Compilation of 529801d3f3ac150f:0:0 with session name  took 6.932391617s and succeeded
I0000 00:00:1719424305.255670   18997 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(529801d3f3ac150f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_7160929566722877590", property.function_library_fingerprint = 5126359482546450708, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/8


I0000 00:00:1719424306.029521   18999 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b94bb8c3fd6ca6fc:0:0), session_name()
I0000 00:00:1719424353.398561   18999 tpu_compile_op_common.cc:245] Compilation of b94bb8c3fd6ca6fc:0:0 with session name  took 47.368988568s and succeeded
I0000 00:00:1719424353.529508   18999 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b94bb8c3fd6ca6fc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10600968335833774855", property.function_library_fingerprint = 15421548863868879518, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7587690949440002 and accuracy: 0.8219866156578064


[I 2024-06-26 17:53:08,433] Trial 973 finished with value: 0.8219866156578064 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2103693791288213, 'weight_decay': 0.014890887396493673, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7071692257339189}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 17:54:28.071271: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719424471.739589   18989 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b50a405cf0398e4:0:0), session_name()
I0000 00:00:1719424522.549625   18989 tpu_compile_op_common.cc:245] Compilation of b50a405cf0398e4:0:0 with session name  took 50.809962391s and succeeded
I0000 00:00:1719424522.687119   18989 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b50a405cf0398e4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16395765784862090647", property.function_library_fingerprint = 16921034681597985370, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, t



2024-06-26 17:55:38.355630: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719424539.095226   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d429ad6ee6277d28:0:0), session_name()
I0000 00:00:1719424545.668097   19008 tpu_compile_op_common.cc:245] Compilation of d429ad6ee6277d28:0:0 with session name  took 6.572793799s and succeeded
I0000 00:00:1719424545.714196   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d429ad6ee6277d28:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_7719890549860657530", property.function_library_fingerprint = 10392073280573279030, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719424546.492381   18940 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d909eb2b7ecf8ec2:0:0), session_name()
I0000 00:00:1719424593.247502   18940 tpu_compile_op_common.cc:245] Compilation of d909eb2b7ecf8ec2:0:0 with session name  took 46.755070443s and succeeded
I0000 00:00:1719424593.385178   18940 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d909eb2b7ecf8ec2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16395765784862090647", property.function_library_fingerprint = 16921034681597985370, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7682682871818542 and accuracy: 0.8191964030265808


[I 2024-06-26 17:57:11,412] Trial 981 finished with value: 0.8191964030265808 and parameters: {'num_epochs': 9, 'dropout_rate': 0.1971540832604503, 'weight_decay': 0.016480304166979657, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4676514002482126}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 17:58:34.205107: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719424718.007111   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4166834860d64121:0:0), session_name()
I0000 00:00:1719424767.824986   18952 tpu_compile_op_common.cc:245] Compilation of 4166834860d64121:0:0 with session name  took 49.817831215s and succeeded
I0000 00:00:1719424767.957704   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4166834860d64121:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12295430368318525300", property.function_library_fingerprint = 7632590086036795337, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 17:59:42.688257: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719424783.444556   18942 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2a7bcda72f7c32ad:0:0), session_name()
I0000 00:00:1719424790.130471   18942 tpu_compile_op_common.cc:245] Compilation of 2a7bcda72f7c32ad:0:0 with session name  took 6.685869255s and succeeded
I0000 00:00:1719424790.175194   18942 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2a7bcda72f7c32ad:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_748499957959309562", property.function_library_fingerprint = 7344055783308324967, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.w

Epoch 2/10


I0000 00:00:1719424790.941596   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(243906dd04a8b7b1:0:0), session_name()
I0000 00:00:1719424837.344735   18952 tpu_compile_op_common.cc:245] Compilation of 243906dd04a8b7b1:0:0 with session name  took 46.403077222s and succeeded
I0000 00:00:1719424837.475079   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(243906dd04a8b7b1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12295430368318525300", property.function_library_fingerprint = 7632590086036795337, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7588416934013367 and accuracy: 0.8364955186843872


[I 2024-06-26 18:01:20,171] Trial 989 finished with value: 0.8364955186843872 and parameters: {'num_epochs': 10, 'dropout_rate': 0.3078770745958665, 'weight_decay': 0.04525783461880031, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.09505210075808974}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 18:02:45.357384: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719424968.839354   18939 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(64772cfda439ce06:0:0), session_name()
I0000 00:00:1719425018.623981   18939 tpu_compile_op_common.cc:245] Compilation of 64772cfda439ce06:0:0 with session name  took 49.784586718s and succeeded
I0000 00:00:1719425018.762744   18939 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(64772cfda439ce06:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8798024672540015307", property.function_library_fingerprint = 5987363030232959392, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 18:03:54.227739: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719425034.975073   18976 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d9ca36cf916e6ca7:0:0), session_name()
I0000 00:00:1719425041.696301   18976 tpu_compile_op_common.cc:245] Compilation of d9ca36cf916e6ca7:0:0 with session name  took 6.721163976s and succeeded
I0000 00:00:1719425041.753656   18976 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d9ca36cf916e6ca7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_7323624663462837465", property.function_library_fingerprint = 12370144692633308551, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/12


I0000 00:00:1719425042.541568   18938 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(26bbc1618a784d15:0:0), session_name()
I0000 00:00:1719425089.180703   18938 tpu_compile_op_common.cc:245] Compilation of 26bbc1618a784d15:0:0 with session name  took 46.639097274s and succeeded
I0000 00:00:1719425089.310593   18938 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(26bbc1618a784d15:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8798024672540015307", property.function_library_fingerprint = 5987363030232959392, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.784844696521759 and accuracy: 0.8431919813156128


[I 2024-06-26 18:05:41,128] Trial 997 finished with value: 0.8431919813156128 and parameters: {'num_epochs': 12, 'dropout_rate': 0.21846452078585119, 'weight_decay': 0.01930816085900713, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8779923898805599}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 18:07:05.807564: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719425229.394810   18974 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fbab3e2ff9576669:0:0), session_name()
I0000 00:00:1719425281.610325   18974 tpu_compile_op_common.cc:245] Compilation of fbab3e2ff9576669:0:0 with session name  took 52.215452444s and succeeded
I0000 00:00:1719425281.722995   18974 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fbab3e2ff9576669:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16149809024488455532", property.function_library_fingerprint = 5123340647673214425, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 18:08:16.568292: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719425297.293579   18985 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8ce91c9ce08eb0a5:0:0), session_name()
I0000 00:00:1719425304.023506   18985 tpu_compile_op_common.cc:245] Compilation of 8ce91c9ce08eb0a5:0:0 with session name  took 6.729861754s and succeeded
I0000 00:00:1719425304.069293   18985 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8ce91c9ce08eb0a5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_700620330868776727", property.function_library_fingerprint = 132552423991337607, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wr

Epoch 2/11


I0000 00:00:1719425304.793465   19026 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(48d05188bfd6494b:0:0), session_name()
I0000 00:00:1719425353.067787   19026 tpu_compile_op_common.cc:245] Compilation of 48d05188bfd6494b:0:0 with session name  took 48.274273853s and succeeded
I0000 00:00:1719425353.173269   19026 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(48d05188bfd6494b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16149809024488455532", property.function_library_fingerprint = 5123340647673214425, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7175930142402649 and accuracy: 0.8175223469734192


[I 2024-06-26 18:10:01,584] Trial 1005 finished with value: 0.8175223469734192 and parameters: {'num_epochs': 11, 'dropout_rate': 0.35363453501516784, 'weight_decay': 0.04158536259990561, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4961126085960609}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 18:11:24.053054: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719425487.643938   18942 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(56122eb4681e54c6:0:0), session_name()
I0000 00:00:1719425537.044435   18942 tpu_compile_op_common.cc:245] Compilation of 56122eb4681e54c6:0:0 with session name  took 49.400444709s and succeeded
I0000 00:00:1719425537.154129   18942 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(56122eb4681e54c6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4567033425511225677", property.function_library_fingerprint = 4264765344561677060, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 18:12:31.685586: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719425552.424203   18933 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(87ad93a2b0b340ae:0:0), session_name()
I0000 00:00:1719425559.471520   18933 tpu_compile_op_common.cc:245] Compilation of 87ad93a2b0b340ae:0:0 with session name  took 7.047224076s and succeeded
I0000 00:00:1719425559.527787   18933 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(87ad93a2b0b340ae:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_17333861131670204963", property.function_library_fingerprint = 7252107407544430974, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719425560.283336   18941 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(68bc7f505b62bb04:0:0), session_name()
I0000 00:00:1719425608.519667   18941 tpu_compile_op_common.cc:245] Compilation of 68bc7f505b62bb04:0:0 with session name  took 48.23629185s and succeeded
I0000 00:00:1719425608.651930   18941 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(68bc7f505b62bb04:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4567033425511225677", property.function_library_fingerprint = 4264765344561677060, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7629622220993042 and accuracy: 0.8487723469734192


2024-06-26 18:14:26.297957: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719425667.048007   18951 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c41e6b88528a6fab:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719425673.522826   18951 tpu_compile_op_common.cc:245] Compilation of c41e6b88528a6fab:0:0 with session name  took 6.474766493s and succeeded
I0000 00:00:1719425673.559272   18951 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c41e6b88528a6fab:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_10478884618218255582", property.function_library_fingerprint = 5872000054435032632, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719425673.559329   18951 tpu_compilation_cache_interface.cc:541] After adding entry for key c41e6b88528a6fab



I0000 00:00:1719425674.743029   19017 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2882d5450812a827:0:0), session_name()




I0000 00:00:1719425680.461508   19017 tpu_compile_op_common.cc:245] Compilation of 2882d5450812a827:0:0 with session name  took 5.718431922s and succeeded
I0000 00:00:1719425680.492914   19017 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2882d5450812a827:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_10478884618218255582", property.function_library_fingerprint = 5872000054435032632, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719425680.492956   19017 tpu_compilation_cache_interface.cc:541] After adding entry for key 2882d5450812a827

Epoch 1/4
Epoch 2/4


I0000 00:00:1719425682.350034   18936 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(235d6aa08d7c7ea9:0:0), session_name()
I0000 00:00:1719425729.278048   18936 tpu_compile_op_common.cc:245] Compilation of 235d6aa08d7c7ea9:0:0 with session name  took 46.927951431s and succeeded
I0000 00:00:1719425729.381999   18936 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(235d6aa08d7c7ea9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4567033425511225677", property.function_library_fingerprint = 4264765344561677060, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_724_accuracy_0.8454_avg_score_0.8032_f1_0.7609_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_724_accuracy_0.8454_avg_score_0.8032_f1_0.7609_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 18:15:35,320] Trial 1013 finished with value: 0.8487723469734192 and parameters: {'num_epochs': 11, 'dropout_rate': 0.27840225380773453, 'weight_decay': 0.04494827235774869, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6918356020424586}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 18:16:56.739361: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719425820.108058   19016 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(27cbad7909a394e9:0:0), session_name()
I0000 00:00:1719425870.062012   19016 tpu_compile_op_common.cc:245] Compilation of 27cbad7909a394e9:0:0 with session name  took 49.953896377s and succeeded
I0000 00:00:1719425870.195511   19016 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(27cbad7909a394e9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17374995083780062698", property.function_library_fingerprint = 9162678213735651289, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 18:18:04.679849: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719425885.382446   18975 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f1e6a71b6138532b:0:0), session_name()
I0000 00:00:1719425891.976995   18975 tpu_compile_op_common.cc:245] Compilation of f1e6a71b6138532b:0:0 with session name  took 6.594514919s and succeeded
I0000 00:00:1719425892.015880   18975 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f1e6a71b6138532b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_11259240125424697697", property.function_library_fingerprint = 12147330661848545091, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/9


I0000 00:00:1719425892.758324   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9283c6f46168bc4:0:0), session_name()
I0000 00:00:1719425940.140415   18947 tpu_compile_op_common.cc:245] Compilation of 9283c6f46168bc4:0:0 with session name  took 47.382055164s and succeeded
I0000 00:00:1719425940.268110   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9283c6f46168bc4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17374995083780062698", property.function_library_fingerprint = 9162678213735651289, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 9/9
f1 score: 0.7678632140159607 and accuracy: 0.8359375


[I 2024-06-26 18:19:39,621] Trial 1024 finished with value: 0.8359375 and parameters: {'num_epochs': 9, 'dropout_rate': 0.197822110314367, 'weight_decay': 0.012628029179601232, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.21022042547808162}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


2024-06-26 18:21:01.553221: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719426065.217599   19022 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(242812191450bca4:0:0), session_name()
I0000 00:00:1719426116.383358   19022 tpu_compile_op_common.cc:245] Compilation of 242812191450bca4:0:0 with session name  took 51.165698217s and succeeded
I0000 00:00:1719426116.518455   19022 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(242812191450bca4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7277532927225836423", property.function_library_fingerprint = 10129818012574192942, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 18:22:11.376653: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719426132.136354   18935 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(44f8536d1c0b37ce:0:0), session_name()
I0000 00:00:1719426138.915338   18935 tpu_compile_op_common.cc:245] Compilation of 44f8536d1c0b37ce:0:0 with session name  took 6.778929412s and succeeded
I0000 00:00:1719426138.964967   18935 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(44f8536d1c0b37ce:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_17680007691192412567", property.function_library_fingerprint = 632157103934414604, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/8


I0000 00:00:1719426139.728227   19005 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b849f74ae7b6c626:0:0), session_name()
I0000 00:00:1719426189.465749   19005 tpu_compile_op_common.cc:245] Compilation of b849f74ae7b6c626:0:0 with session name  took 49.737433497s and succeeded
I0000 00:00:1719426189.600691   19005 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b849f74ae7b6c626:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7277532927225836423", property.function_library_fingerprint = 10129818012574192942, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7413029670715332 and accuracy: 0.7779017686843872


[I 2024-06-26 18:23:44,298] Trial 1032 finished with value: 0.7779017686843872 and parameters: {'num_epochs': 8, 'dropout_rate': 0.28529744916816613, 'weight_decay': 0.06444065568619514, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8272793979339247}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 18:25:06.033236: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719426309.884197   18944 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6062c29356e7cc70:0:0), session_name()
I0000 00:00:1719426358.945072   18944 tpu_compile_op_common.cc:245] Compilation of 6062c29356e7cc70:0:0 with session name  took 49.060807805s and succeeded
I0000 00:00:1719426359.051855   18944 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6062c29356e7cc70:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11026270177809390848", property.function_library_fingerprint = 17233643795545675565, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 18:26:14.198514: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719426374.958073   18971 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7cf997e312b27399:0:0), session_name()
I0000 00:00:1719426381.590104   18971 tpu_compile_op_common.cc:245] Compilation of 7cf997e312b27399:0:0 with session name  took 6.631981043s and succeeded
I0000 00:00:1719426381.633442   18971 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7cf997e312b27399:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10420491230462753692", property.function_library_fingerprint = 6417739628347364462, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719426382.356994   19000 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(815b1556a4d96a22:0:0), session_name()
I0000 00:00:1719426428.848790   19000 tpu_compile_op_common.cc:245] Compilation of 815b1556a4d96a22:0:0 with session name  took 46.491741621s and succeeded
I0000 00:00:1719426428.961091   19000 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(815b1556a4d96a22:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11026270177809390848", property.function_library_fingerprint = 17233643795545675565, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7880354523658752 and accuracy: 0.8448660969734192


[I 2024-06-26 18:27:51,761] Trial 1040 finished with value: 0.8448660969734192 and parameters: {'num_epochs': 10, 'dropout_rate': 0.16963258613405466, 'weight_decay': 0.021654498728544087, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.5555357332036148}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 18:29:15.728085: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719426559.492813   19001 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1ef80ebdb07c6732:0:0), session_name()
I0000 00:00:1719426608.346506   19001 tpu_compile_op_common.cc:245] Compilation of 1ef80ebdb07c6732:0:0 with session name  took 48.853644707s and succeeded
I0000 00:00:1719426608.453405   19001 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1ef80ebdb07c6732:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14012603208994831187", property.function_library_fingerprint = 16659591917779982212, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 18:30:23.274801: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719426624.003424   18983 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(20c01d48e94f053e:0:0), session_name()
I0000 00:00:1719426630.725655   18983 tpu_compile_op_common.cc:245] Compilation of 20c01d48e94f053e:0:0 with session name  took 6.722173444s and succeeded
I0000 00:00:1719426630.770070   18983 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(20c01d48e94f053e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4208820719171240652", property.function_library_fingerprint = 11431637599616123066, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719426631.516601   19016 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f51aac1669d116d7:0:0), session_name()
I0000 00:00:1719426678.479759   19016 tpu_compile_op_common.cc:245] Compilation of f51aac1669d116d7:0:0 with session name  took 46.96311117s and succeeded
I0000 00:00:1719426678.616481   19016 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f51aac1669d116d7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14012603208994831187", property.function_library_fingerprint = 16659591917779982212, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7779589891433716 and accuracy: 0.8459821343421936


[I 2024-06-26 18:32:07,363] Trial 1047 finished with value: 0.8459821343421936 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2215357757055886, 'weight_decay': 0.025251461995749826, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.15762401283110267}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 18:33:31.194670: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719426815.132531   18992 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5e8cfc84c33d3f9:0:0), session_name()
I0000 00:00:1719426864.415639   18992 tpu_compile_op_common.cc:245] Compilation of 5e8cfc84c33d3f9:0:0 with session name  took 49.283049229s and succeeded
I0000 00:00:1719426864.545112   18992 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5e8cfc84c33d3f9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17076152395238122774", property.function_library_fingerprint = 4990648517014549129, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, to



2024-06-26 18:34:39.620972: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719426880.367193   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6cb690c455974d53:0:0), session_name()
I0000 00:00:1719426887.244734   18931 tpu_compile_op_common.cc:245] Compilation of 6cb690c455974d53:0:0 with session name  took 6.8774906s and succeeded
I0000 00:00:1719426887.288699   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6cb690c455974d53:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10824202088694657035", property.function_library_fingerprint = 6607180217940973684, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.w

Epoch 2/10


I0000 00:00:1719426888.086942   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6eb1ffdc7178c8c1:0:0), session_name()
I0000 00:00:1719426936.056829   19011 tpu_compile_op_common.cc:245] Compilation of 6eb1ffdc7178c8c1:0:0 with session name  took 47.969835209s and succeeded
I0000 00:00:1719426936.183544   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6eb1ffdc7178c8c1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17076152395238122774", property.function_library_fingerprint = 4990648517014549129, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7762168645858765 and accuracy: 0.8409598469734192


[I 2024-06-26 18:36:20,001] Trial 1055 finished with value: 0.8409598469734192 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2301020581747788, 'weight_decay': 0.05447871363897605, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6307300726789588}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 18:37:41.977766: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719427065.895980   18940 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d6276285e84ed8c0:0:0), session_name()
I0000 00:00:1719427116.514453   18940 tpu_compile_op_common.cc:245] Compilation of d6276285e84ed8c0:0:0 with session name  took 50.6184s and succeeded
I0000 00:00:1719427116.622861   18940 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d6276285e84ed8c0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13179986101733476751", property.function_library_fingerprint = 7680378846496982201, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



2024-06-26 18:38:51.162964: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719427131.844280   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dd518236de0c71b2:0:0), session_name()
I0000 00:00:1719427138.791992   18952 tpu_compile_op_common.cc:245] Compilation of dd518236de0c71b2:0:0 with session name  took 6.947659304s and succeeded
I0000 00:00:1719427138.846603   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dd518236de0c71b2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12997729822111167504", property.function_library_fingerprint = 15054088974295558108, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719427139.593040   19021 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(154f0dcfd1bc6314:0:0), session_name()
I0000 00:00:1719427186.235988   19021 tpu_compile_op_common.cc:245] Compilation of 154f0dcfd1bc6314:0:0 with session name  took 46.642890954s and succeeded
I0000 00:00:1719427186.368504   19021 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(154f0dcfd1bc6314:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13179986101733476751", property.function_library_fingerprint = 7680378846496982201, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7587835192680359 and accuracy: 0.8482142686843872


2024-06-26 18:40:38.109092: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719427238.827877   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cd325046c40b41f8:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719427245.280183   18956 tpu_compile_op_common.cc:245] Compilation of cd325046c40b41f8:0:0 with session name  took 6.452247424s and succeeded
I0000 00:00:1719427245.326060   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cd325046c40b41f8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13942204025987508925", property.function_library_fingerprint = 8214727848515055161, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719427245.326121   18956 tpu_compilation_cache_interface.cc:541] After adding entry for key cd325046c40b41f8



I0000 00:00:1719427246.599361   18942 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cf5e2dd352b01821:0:0), session_name()




I0000 00:00:1719427252.414105   18942 tpu_compile_op_common.cc:245] Compilation of cf5e2dd352b01821:0:0 with session name  took 5.814699671s and succeeded
I0000 00:00:1719427252.456410   18942 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cf5e2dd352b01821:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13942204025987508925", property.function_library_fingerprint = 8214727848515055161, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719427252.456469   18942 tpu_compilation_cache_interface.cc:541] After adding entry for key cf5e2dd352b01821

Epoch 1/3
Epoch 2/3


I0000 00:00:1719427254.330755   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d031de3900ad4de5:0:0), session_name()
I0000 00:00:1719427303.424451   18931 tpu_compile_op_common.cc:245] Compilation of d031de3900ad4de5:0:0 with session name  took 49.093631996s and succeeded
I0000 00:00:1719427303.568657   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d031de3900ad4de5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13179986101733476751", property.function_library_fingerprint = 7680378846496982201, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/3
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_799_accuracy_0.8471_avg_score_0.8150_f1_0.7830_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_799_accuracy_0.8471_avg_score_0.8150_f1_0.7830_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 18:41:48,539] Trial 1063 finished with value: 0.8482142686843872 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2372315388058045, 'weight_decay': 0.05265729425942164, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.6591633932705947}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 18:43:15.540970: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719427399.180857   18987 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e2c8cbaa929e018d:0:0), session_name()
I0000 00:00:1719427449.025011   18987 tpu_compile_op_common.cc:245] Compilation of e2c8cbaa929e018d:0:0 with session name  took 49.844091332s and succeeded
I0000 00:00:1719427449.157091   18987 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e2c8cbaa929e018d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15998271356194784050", property.function_library_fingerprint = 13186765513021772745, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 18:44:24.725069: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719427465.457566   18987 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fcf70a9abb481b78:0:0), session_name()
I0000 00:00:1719427472.384386   18987 tpu_compile_op_common.cc:245] Compilation of fcf70a9abb481b78:0:0 with session name  took 6.926760652s and succeeded
I0000 00:00:1719427472.426555   18987 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fcf70a9abb481b78:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12330464470531165343", property.function_library_fingerprint = 16934675762957601165, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/11


I0000 00:00:1719427473.251567   18971 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(34f2f182d6cb91bc:0:0), session_name()
I0000 00:00:1719427520.133512   18971 tpu_compile_op_common.cc:245] Compilation of 34f2f182d6cb91bc:0:0 with session name  took 46.881887802s and succeeded
I0000 00:00:1719427520.239809   18971 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(34f2f182d6cb91bc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15998271356194784050", property.function_library_fingerprint = 13186765513021772745, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7762807011604309 and accuracy: 0.8415178656578064


[I 2024-06-26 18:46:07,569] Trial 1072 finished with value: 0.8415178656578064 and parameters: {'num_epochs': 11, 'dropout_rate': 0.20974705737932606, 'weight_decay': 0.018279492040128992, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6681509670343292}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 18:47:28.503052: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719427652.020687   18989 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6a2c951c0ebb45a1:0:0), session_name()
I0000 00:00:1719427701.677385   18989 tpu_compile_op_common.cc:245] Compilation of 6a2c951c0ebb45a1:0:0 with session name  took 49.656623298s and succeeded
I0000 00:00:1719427701.807714   18989 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6a2c951c0ebb45a1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7304084182749137237", property.function_library_fingerprint = 2121902776550759023, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 18:48:36.066493: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719427716.766892   18988 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8bb58b0fb3fb8271:0:0), session_name()
I0000 00:00:1719427723.458966   18988 tpu_compile_op_common.cc:245] Compilation of 8bb58b0fb3fb8271:0:0 with session name  took 6.692021294s and succeeded
I0000 00:00:1719427723.501791   18988 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8bb58b0fb3fb8271:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_3983198712457414109", property.function_library_fingerprint = 10113910884852809086, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719427724.295847   18988 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(515c33a9af4fe83c:0:0), session_name()
I0000 00:00:1719427771.359928   18988 tpu_compile_op_common.cc:245] Compilation of 515c33a9af4fe83c:0:0 with session name  took 47.064043039s and succeeded
I0000 00:00:1719427771.486116   18988 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(515c33a9af4fe83c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7304084182749137237", property.function_library_fingerprint = 2121902776550759023, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7209614515304565 and accuracy: 0.8409598469734192


[I 2024-06-26 18:50:11,070] Trial 1080 finished with value: 0.8409598469734192 and parameters: {'num_epochs': 9, 'dropout_rate': 0.31234896758330044, 'weight_decay': 0.01493805078166837, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.692812909314291}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 18:51:31.900807: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719427895.621696   18953 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cfb050091cd8f178:0:0), session_name()
I0000 00:00:1719427946.298372   18953 tpu_compile_op_common.cc:245] Compilation of cfb050091cd8f178:0:0 with session name  took 50.676604155s and succeeded
I0000 00:00:1719427946.431417   18953 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cfb050091cd8f178:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13943405136098435424", property.function_library_fingerprint = 6329533389779885498, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 18:52:40.846749: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719427961.544175   19023 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c3a60af76299830:0:0), session_name()
I0000 00:00:1719427968.363956   19023 tpu_compile_op_common.cc:245] Compilation of c3a60af76299830:0:0 with session name  took 6.819735452s and succeeded
I0000 00:00:1719427968.413091   19023 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c3a60af76299830:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_9321805766516791480", property.function_library_fingerprint = 10010340742098491724, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wr

Epoch 2/12


I0000 00:00:1719427969.206203   18976 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8dde2429d46365d3:0:0), session_name()
I0000 00:00:1719428017.910573   18976 tpu_compile_op_common.cc:245] Compilation of 8dde2429d46365d3:0:0 with session name  took 48.704299085s and succeeded
I0000 00:00:1719428018.040016   18976 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8dde2429d46365d3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_13943405136098435424", property.function_library_fingerprint = 6329533389779885498, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7479857802391052 and accuracy: 0.8376116156578064


[I 2024-06-26 18:54:30,702] Trial 1088 finished with value: 0.8376116156578064 and parameters: {'num_epochs': 12, 'dropout_rate': 0.36413369418030256, 'weight_decay': 0.023219764724535984, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7052902201115074}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 18:55:53.788823: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719428157.476930   18941 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(18c1dc28e0c1f7ed:0:0), session_name()
I0000 00:00:1719428208.330970   18941 tpu_compile_op_common.cc:245] Compilation of 18c1dc28e0c1f7ed:0:0 with session name  took 50.853985411s and succeeded
I0000 00:00:1719428208.467513   18941 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(18c1dc28e0c1f7ed:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2455686422032653703", property.function_library_fingerprint = 5998859633169840923, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 18:57:03.231814: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719428223.935882   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d6696c38140b70e3:0:0), session_name()
I0000 00:00:1719428231.042917   18931 tpu_compile_op_common.cc:245] Compilation of d6696c38140b70e3:0:0 with session name  took 7.106970543s and succeeded
I0000 00:00:1719428231.089797   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d6696c38140b70e3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_3321525115821123820", property.function_library_fingerprint = 9497579701096540279, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/10


I0000 00:00:1719428231.856134   18997 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(36e9d247de0ab638:0:0), session_name()
I0000 00:00:1719428277.897060   18997 tpu_compile_op_common.cc:245] Compilation of 36e9d247de0ab638:0:0 with session name  took 46.040880351s and succeeded
I0000 00:00:1719428278.000332   18997 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(36e9d247de0ab638:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2455686422032653703", property.function_library_fingerprint = 5998859633169840923, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7754150032997131 and accuracy: 0.84765625


[I 2024-06-26 18:58:40,698] Trial 1096 finished with value: 0.84765625 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2228704849840542, 'weight_decay': 0.027324143169187683, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.6049517052494446}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/13


2024-06-26 19:00:03.831335: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719428407.191868   18943 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6990f6eeb83e32dd:0:0), session_name()
I0000 00:00:1719428455.429781   18943 tpu_compile_op_common.cc:245] Compilation of 6990f6eeb83e32dd:0:0 with session name  took 48.237854333s and succeeded
I0000 00:00:1719428455.536480   18943 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6990f6eeb83e32dd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3938421415070492102", property.function_library_fingerprint = 7300889990779418841, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 19:01:10.344825: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719428471.071734   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8b19d2d4d27a0e80:0:0), session_name()
I0000 00:00:1719428477.625693   18956 tpu_compile_op_common.cc:245] Compilation of 8b19d2d4d27a0e80:0:0 with session name  took 6.553890785s and succeeded
I0000 00:00:1719428477.666200   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8b19d2d4d27a0e80:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_1075765299201965559", property.function_library_fingerprint = 8248926531886572060, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/13


I0000 00:00:1719428478.435241   19024 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(79440ad0233a4e79:0:0), session_name()
I0000 00:00:1719428525.863495   19024 tpu_compile_op_common.cc:245] Compilation of 79440ad0233a4e79:0:0 with session name  took 47.428183543s and succeeded
I0000 00:00:1719428526.008026   19024 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(79440ad0233a4e79:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3938421415070492102", property.function_library_fingerprint = 7300889990779418841, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7847558259963989 and accuracy: 0.8264508843421936


[I 2024-06-26 19:03:03,570] Trial 1104 finished with value: 0.8264508843421936 and parameters: {'num_epochs': 13, 'dropout_rate': 0.20359828468741858, 'weight_decay': 0.020496718582698418, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6439968065724161}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 19:04:25.707596: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719428669.454181   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8527bf2d0d970758:0:0), session_name()
I0000 00:00:1719428719.221454   18952 tpu_compile_op_common.cc:245] Compilation of 8527bf2d0d970758:0:0 with session name  took 49.767171278s and succeeded
I0000 00:00:1719428719.325872   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8527bf2d0d970758:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7682047782473633342", property.function_library_fingerprint = 13498889435822935308, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 19:05:33.972407: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719428734.735408   18940 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4475bb1698c8004a:0:0), session_name()
I0000 00:00:1719428741.546729   18940 tpu_compile_op_common.cc:245] Compilation of 4475bb1698c8004a:0:0 with session name  took 6.811240471s and succeeded
I0000 00:00:1719428741.584935   18940 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4475bb1698c8004a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_17914364637111337171", property.function_library_fingerprint = 13977232395600343656, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/11


I0000 00:00:1719428742.347348   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ec9a2cfe8fbbc94b:0:0), session_name()
I0000 00:00:1719428788.959095   19011 tpu_compile_op_common.cc:245] Compilation of ec9a2cfe8fbbc94b:0:0 with session name  took 46.611689471s and succeeded
I0000 00:00:1719428789.070566   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec9a2cfe8fbbc94b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7682047782473633342", property.function_library_fingerprint = 13498889435822935308, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7496395111083984 and accuracy: 0.8392857313156128


[I 2024-06-26 19:07:16,546] Trial 1110 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 11, 'dropout_rate': 0.30574704819101056, 'weight_decay': 0.016289371494387913, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6473552168762593}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 19:08:40.512567: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719428924.008825   18970 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f7ee92d76759678:0:0), session_name()
I0000 00:00:1719428972.808681   18970 tpu_compile_op_common.cc:245] Compilation of f7ee92d76759678:0:0 with session name  took 48.799788626s and succeeded
I0000 00:00:1719428972.915207   18970 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f7ee92d76759678:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11491171722789718321", property.function_library_fingerprint = 14302297940831841923, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, t



2024-06-26 19:09:48.746686: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719428989.485002   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cbe09bffd8ffa217:0:0), session_name()
I0000 00:00:1719428996.276526   19011 tpu_compile_op_common.cc:245] Compilation of cbe09bffd8ffa217:0:0 with session name  took 6.791461661s and succeeded
I0000 00:00:1719428996.323803   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cbe09bffd8ffa217:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_17434814770000387366", property.function_library_fingerprint = 17446298581961604690, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719428997.124544   18962 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(320e4d9659fe744b:0:0), session_name()
I0000 00:00:1719429043.465627   18962 tpu_compile_op_common.cc:245] Compilation of 320e4d9659fe744b:0:0 with session name  took 46.341015305s and succeeded
I0000 00:00:1719429043.594424   18962 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(320e4d9659fe744b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11491171722789718321", property.function_library_fingerprint = 14302297940831841923, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7561531066894531 and accuracy: 0.8381696343421936


[I 2024-06-26 19:11:26,593] Trial 1116 finished with value: 0.8381696343421936 and parameters: {'num_epochs': 10, 'dropout_rate': 0.29013320301917617, 'weight_decay': 0.0481343682815021, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7383196921952792}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 19:12:48.361641: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719429171.978982   18963 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e2879ef3ea6f1142:0:0), session_name()
I0000 00:00:1719429220.861818   18963 tpu_compile_op_common.cc:245] Compilation of e2879ef3ea6f1142:0:0 with session name  took 48.882783858s and succeeded
I0000 00:00:1719429220.972019   18963 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e2879ef3ea6f1142:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18203438419977560091", property.function_library_fingerprint = 8548283155201900960, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 19:13:55.420351: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719429236.189506   18935 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9f1061e589421553:0:0), session_name()
I0000 00:00:1719429243.349458   18935 tpu_compile_op_common.cc:245] Compilation of 9f1061e589421553:0:0 with session name  took 7.159893598s and succeeded
I0000 00:00:1719429243.400420   18935 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9f1061e589421553:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_3388237935427366220", property.function_library_fingerprint = 10072838182305893074, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719429244.140749   18997 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c2cd4ddce15d610f:0:0), session_name()
I0000 00:00:1719429293.267802   18997 tpu_compile_op_common.cc:245] Compilation of c2cd4ddce15d610f:0:0 with session name  took 49.127002747s and succeeded
I0000 00:00:1719429293.399398   18997 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c2cd4ddce15d610f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18203438419977560091", property.function_library_fingerprint = 8548283155201900960, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7834711670875549 and accuracy: 0.8286830186843872


[I 2024-06-26 19:15:40,654] Trial 1124 finished with value: 0.8286830186843872 and parameters: {'num_epochs': 11, 'dropout_rate': 0.23869247077393752, 'weight_decay': 0.009215699192369479, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.08340255208232374}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 19:17:08.158871: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719429431.674670   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d7bed03a02c16ae7:0:0), session_name()
I0000 00:00:1719429482.365427   19012 tpu_compile_op_common.cc:245] Compilation of d7bed03a02c16ae7:0:0 with session name  took 50.690689906s and succeeded
I0000 00:00:1719429482.477883   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d7bed03a02c16ae7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9535646829370231347", property.function_library_fingerprint = 10051639551808124287, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 19:18:17.825198: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719429498.545515   18982 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8ee018684ef7169a:0:0), session_name()
I0000 00:00:1719429505.687525   18982 tpu_compile_op_common.cc:245] Compilation of 8ee018684ef7169a:0:0 with session name  took 7.141967555s and succeeded
I0000 00:00:1719429505.730260   18982 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8ee018684ef7169a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_7863042984239157296", property.function_library_fingerprint = 8105049901752799794, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/10


I0000 00:00:1719429506.500209   18966 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(248d39fa3ab25e63:0:0), session_name()
I0000 00:00:1719429554.142503   18966 tpu_compile_op_common.cc:245] Compilation of 248d39fa3ab25e63:0:0 with session name  took 47.642218746s and succeeded
I0000 00:00:1719429554.273124   18966 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(248d39fa3ab25e63:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9535646829370231347", property.function_library_fingerprint = 10051639551808124287, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7570384740829468 and accuracy: 0.8309151530265808


[I 2024-06-26 19:19:57,259] Trial 1132 finished with value: 0.8309151530265808 and parameters: {'num_epochs': 10, 'dropout_rate': 0.24507694088423249, 'weight_decay': 0.012248636766014809, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.10173190941327848}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 19:21:21.153937: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719429684.950131   19003 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dea435c3f069f6d8:0:0), session_name()
I0000 00:00:1719429735.403605   19003 tpu_compile_op_common.cc:245] Compilation of dea435c3f069f6d8:0:0 with session name  took 50.453420036s and succeeded
I0000 00:00:1719429735.514260   19003 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dea435c3f069f6d8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4930738364150840184", property.function_library_fingerprint = 4269083035224858880, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 19:22:30.126010: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719429750.900382   19023 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8ae6fe868b983d9d:0:0), session_name()
I0000 00:00:1719429757.837263   19023 tpu_compile_op_common.cc:245] Compilation of 8ae6fe868b983d9d:0:0 with session name  took 6.936816273s and succeeded
I0000 00:00:1719429757.882076   19023 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8ae6fe868b983d9d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_16698973546136624178", property.function_library_fingerprint = 9783273083318468608, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719429758.679503   19003 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(908361ab67930fdd:0:0), session_name()
I0000 00:00:1719429806.412405   19003 tpu_compile_op_common.cc:245] Compilation of 908361ab67930fdd:0:0 with session name  took 47.732843494s and succeeded
I0000 00:00:1719429806.519923   19003 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(908361ab67930fdd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4930738364150840184", property.function_library_fingerprint = 4269083035224858880, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7612587809562683 and accuracy: 0.8420758843421936


[I 2024-06-26 19:24:09,371] Trial 1139 finished with value: 0.8420758843421936 and parameters: {'num_epochs': 10, 'dropout_rate': 0.25002726388177166, 'weight_decay': 0.010682036228914874, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.11010286010881175}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 19:25:37.816686: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719429941.390512   18971 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9dadd91c36f86696:0:0), session_name()
I0000 00:00:1719429992.349995   18971 tpu_compile_op_common.cc:245] Compilation of 9dadd91c36f86696:0:0 with session name  took 50.959426169s and succeeded
I0000 00:00:1719429992.468300   18971 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9dadd91c36f86696:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2568008695481017842", property.function_library_fingerprint = 263203353516300454, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, t



2024-06-26 19:26:47.838858: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719430008.528578   18979 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(470d78d00a042f9a:0:0), session_name()
I0000 00:00:1719430015.487057   18979 tpu_compile_op_common.cc:245] Compilation of 470d78d00a042f9a:0:0 with session name  took 6.958440557s and succeeded
I0000 00:00:1719430015.531257   18979 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(470d78d00a042f9a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_8653064646059456553", property.function_library_fingerprint = 15070793976688186137, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719430016.285369   18938 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(36b141b3289d3927:0:0), session_name()
I0000 00:00:1719430065.547576   18938 tpu_compile_op_common.cc:245] Compilation of 36b141b3289d3927:0:0 with session name  took 49.26215815s and succeeded
I0000 00:00:1719430065.660991   18938 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(36b141b3289d3927:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2568008695481017842", property.function_library_fingerprint = 263203353516300454, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7516655921936035 and accuracy: 0.8431919813156128


[I 2024-06-26 19:28:33,135] Trial 1147 finished with value: 0.8431919813156128 and parameters: {'num_epochs': 11, 'dropout_rate': 0.23316403124655394, 'weight_decay': 0.00620740996684226, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.10472656081289011}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 19:29:55.747991: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719430199.259817   19019 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(aaa33aafe4281707:0:0), session_name()
I0000 00:00:1719430249.905109   19019 tpu_compile_op_common.cc:245] Compilation of aaa33aafe4281707:0:0 with session name  took 50.64523337s and succeeded
I0000 00:00:1719430250.029799   19019 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aaa33aafe4281707:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8648790586118207509", property.function_library_fingerprint = 17497884244858245586, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 19:31:04.114393: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719430264.818653   18936 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b69fc10f7e2e389c:0:0), session_name()
I0000 00:00:1719430271.845047   18936 tpu_compile_op_common.cc:245] Compilation of b69fc10f7e2e389c:0:0 with session name  took 7.026319195s and succeeded
I0000 00:00:1719430271.888459   18936 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b69fc10f7e2e389c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_6530473551724753962", property.function_library_fingerprint = 1968780133592541408, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/9


I0000 00:00:1719430272.648298   18961 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4dc5f9a9b00cea1f:0:0), session_name()
I0000 00:00:1719430320.894743   18961 tpu_compile_op_common.cc:245] Compilation of 4dc5f9a9b00cea1f:0:0 with session name  took 48.246377086s and succeeded
I0000 00:00:1719430321.013860   18961 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4dc5f9a9b00cea1f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8648790586118207509", property.function_library_fingerprint = 17497884244858245586, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7491037249565125 and accuracy: 0.83984375


[I 2024-06-26 19:32:39,737] Trial 1155 finished with value: 0.83984375 and parameters: {'num_epochs': 9, 'dropout_rate': 0.22605417620106907, 'weight_decay': 0.012693877003470118, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.1361422632297381}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 19:34:06.200917: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719430449.653677   19020 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6352458453142140:0:0), session_name()
I0000 00:00:1719430502.119391   19020 tpu_compile_op_common.cc:245] Compilation of 6352458453142140:0:0 with session name  took 52.465663705s and succeeded
I0000 00:00:1719430502.222933   19020 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6352458453142140:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10347940504719904475", property.function_library_fingerprint = 9923411656720839021, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 19:35:17.823390: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719430518.534776   18961 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1c541d191bedb5f0:0:0), session_name()
I0000 00:00:1719430525.648314   18961 tpu_compile_op_common.cc:245] Compilation of 1c541d191bedb5f0:0:0 with session name  took 7.113484139s and succeeded
I0000 00:00:1719430525.699511   18961 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1c541d191bedb5f0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_17365860492563174830", property.function_library_fingerprint = 10220384127378985304, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719430526.483897   18951 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b1a5c632fb4aad8c:0:0), session_name()
I0000 00:00:1719430577.070928   18951 tpu_compile_op_common.cc:245] Compilation of b1a5c632fb4aad8c:0:0 with session name  took 50.586979213s and succeeded
I0000 00:00:1719430577.178570   18951 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b1a5c632fb4aad8c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10347940504719904475", property.function_library_fingerprint = 9923411656720839021, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7613545656204224 and accuracy: 0.82421875


[I 2024-06-26 19:37:00,487] Trial 1163 finished with value: 0.82421875 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2635813714440116, 'weight_decay': 0.014093415931631968, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9197856241888267}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 19:38:23.994359: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719430707.480650   18934 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(99c67d037397bdeb:0:0), session_name()
I0000 00:00:1719430757.976193   18934 tpu_compile_op_common.cc:245] Compilation of 99c67d037397bdeb:0:0 with session name  took 50.495497621s and succeeded
I0000 00:00:1719430758.077310   18934 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(99c67d037397bdeb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3846220107256545448", property.function_library_fingerprint = 4690763090483223485, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 19:39:32.624779: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719430773.401063   18984 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c232bf5565847e8c:0:0), session_name()
I0000 00:00:1719430780.384726   18984 tpu_compile_op_common.cc:245] Compilation of c232bf5565847e8c:0:0 with session name  took 6.983587844s and succeeded
I0000 00:00:1719430780.430208   18984 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c232bf5565847e8c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_6265924774825604909", property.function_library_fingerprint = 13609565409085547708, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719430781.220928   18992 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8b0bf6b5c68401e9:0:0), session_name()
I0000 00:00:1719430828.888004   18992 tpu_compile_op_common.cc:245] Compilation of 8b0bf6b5c68401e9:0:0 with session name  took 47.667012371s and succeeded
I0000 00:00:1719430829.014016   18992 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8b0bf6b5c68401e9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3846220107256545448", property.function_library_fingerprint = 4690763090483223485, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7733097076416016 and accuracy: 0.8459821343421936


[I 2024-06-26 19:41:12,926] Trial 1171 finished with value: 0.8459821343421936 and parameters: {'num_epochs': 10, 'dropout_rate': 0.21770800239145421, 'weight_decay': 0.015605360145735828, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7540067718672617}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 19:42:36.757745: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719430960.556394   18959 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fcc91a71a4775476:0:0), session_name()
I0000 00:00:1719431012.721877   18959 tpu_compile_op_common.cc:245] Compilation of fcc91a71a4775476:0:0 with session name  took 52.165422746s and succeeded
I0000 00:00:1719431012.834323   18959 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fcc91a71a4775476:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6483009416874662452", property.function_library_fingerprint = 13764351862544153533, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 19:43:47.744776: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719431028.457488   18977 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(793a55463080a30e:0:0), session_name()
I0000 00:00:1719431035.437758   18977 tpu_compile_op_common.cc:245] Compilation of 793a55463080a30e:0:0 with session name  took 6.98021623s and succeeded
I0000 00:00:1719431035.486123   18977 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(793a55463080a30e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_8232815275719711539", property.function_library_fingerprint = 11620665205220276025, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/9


I0000 00:00:1719431036.272551   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4baec4ffa14bd90c:0:0), session_name()
I0000 00:00:1719431084.174323   18947 tpu_compile_op_common.cc:245] Compilation of 4baec4ffa14bd90c:0:0 with session name  took 47.901725322s and succeeded
I0000 00:00:1719431084.274755   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4baec4ffa14bd90c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6483009416874662452", property.function_library_fingerprint = 13764351862544153533, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7548493146896362 and accuracy: 0.8431919813156128


[I 2024-06-26 19:45:23,530] Trial 1179 finished with value: 0.8431919813156128 and parameters: {'num_epochs': 9, 'dropout_rate': 0.21229847562641493, 'weight_decay': 0.03134498076859137, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6967076918842313}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 19:46:44.635478: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719431208.165937   18951 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bb531e6afc9f3744:0:0), session_name()
I0000 00:00:1719431258.925203   18951 tpu_compile_op_common.cc:245] Compilation of bb531e6afc9f3744:0:0 with session name  took 50.759199188s and succeeded
I0000 00:00:1719431259.031390   18951 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bb531e6afc9f3744:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16012860877974850383", property.function_library_fingerprint = 4706180702520683608, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 19:47:54.662430: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719431275.413495   18961 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(99520b6d831446e2:0:0), session_name()
I0000 00:00:1719431282.276347   18961 tpu_compile_op_common.cc:245] Compilation of 99520b6d831446e2:0:0 with session name  took 6.862787648s and succeeded
I0000 00:00:1719431282.326524   18961 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(99520b6d831446e2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10590103319001663820", property.function_library_fingerprint = 5179314742089896073, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719431283.109757   19007 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dc9db1aad011c91e:0:0), session_name()
I0000 00:00:1719431330.224827   19007 tpu_compile_op_common.cc:245] Compilation of dc9db1aad011c91e:0:0 with session name  took 47.115014063s and succeeded
I0000 00:00:1719431330.328851   19007 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dc9db1aad011c91e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16012860877974850383", property.function_library_fingerprint = 4706180702520683608, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7750688791275024 and accuracy: 0.8415178656578064


[I 2024-06-26 19:49:33,579] Trial 1187 finished with value: 0.8415178656578064 and parameters: {'num_epochs': 10, 'dropout_rate': 0.19757287529936465, 'weight_decay': 0.016152415295595274, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7129548463205866}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 19:50:57.519906: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719431461.211207   19001 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c0e77fe7fd6490d2:0:0), session_name()
I0000 00:00:1719431512.968531   19001 tpu_compile_op_common.cc:245] Compilation of c0e77fe7fd6490d2:0:0 with session name  took 51.757260687s and succeeded
I0000 00:00:1719431513.072618   19001 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c0e77fe7fd6490d2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6495711974931349712", property.function_library_fingerprint = 11773305539634859132, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 19:52:07.801582: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719431528.581137   18967 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(48cd802a2cfabb26:0:0), session_name()
I0000 00:00:1719431535.493158   18967 tpu_compile_op_common.cc:245] Compilation of 48cd802a2cfabb26:0:0 with session name  took 6.911970418s and succeeded
I0000 00:00:1719431535.545150   18967 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(48cd802a2cfabb26:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_17323861515348280763", property.function_library_fingerprint = 15007844857277390785, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/11


I0000 00:00:1719431536.362659   18977 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(29beb24df5985bbf:0:0), session_name()
I0000 00:00:1719431583.913565   18977 tpu_compile_op_common.cc:245] Compilation of 29beb24df5985bbf:0:0 with session name  took 47.550867208s and succeeded
I0000 00:00:1719431584.014687   18977 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(29beb24df5985bbf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_6495711974931349712", property.function_library_fingerprint = 11773305539634859132, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7721515893936157 and accuracy: 0.8353794813156128


[I 2024-06-26 19:53:52,956] Trial 1195 finished with value: 0.8353794813156128 and parameters: {'num_epochs': 11, 'dropout_rate': 0.22366561910460622, 'weight_decay': 0.0185364935423463, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7270078701224006}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 19:55:16.642247: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719431720.422092   18987 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4233bbec6e9830f:0:0), session_name()
I0000 00:00:1719431769.675005   18987 tpu_compile_op_common.cc:245] Compilation of 4233bbec6e9830f:0:0 with session name  took 49.252840887s and succeeded
I0000 00:00:1719431769.805800   18987 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4233bbec6e9830f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_540477331914608224", property.function_library_fingerprint = 275290182205336828, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



2024-06-26 19:56:24.481812: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719431785.267370   18972 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(728a6b3ebaaf346e:0:0), session_name()
I0000 00:00:1719431792.260595   18972 tpu_compile_op_common.cc:245] Compilation of 728a6b3ebaaf346e:0:0 with session name  took 6.993171795s and succeeded
I0000 00:00:1719431792.301877   18972 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(728a6b3ebaaf346e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_5044760499138181324", property.function_library_fingerprint = 1147272749771653523, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/11


I0000 00:00:1719431793.078031   19019 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2809276904af9792:0:0), session_name()
I0000 00:00:1719431840.344511   19019 tpu_compile_op_common.cc:245] Compilation of 2809276904af9792:0:0 with session name  took 47.26641534s and succeeded
I0000 00:00:1719431840.477669   19019 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2809276904af9792:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_540477331914608224", property.function_library_fingerprint = 275290182205336828, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7670601010322571 and accuracy: 0.8353794813156128


[I 2024-06-26 19:58:09,305] Trial 1203 finished with value: 0.8353794813156128 and parameters: {'num_epochs': 11, 'dropout_rate': 0.23100339281917628, 'weight_decay': 0.01156095192212859, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7324120543834419}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/18


2024-06-26 19:59:33.691278: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719431977.343428   19014 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e932c9f1e12923b2:0:0), session_name()
I0000 00:00:1719432025.438803   19014 tpu_compile_op_common.cc:245] Compilation of e932c9f1e12923b2:0:0 with session name  took 48.09531771s and succeeded
I0000 00:00:1719432025.542236   19014 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e932c9f1e12923b2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15542619008925760805", property.function_library_fingerprint = 3017827087962089849, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 20:00:40.697804: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719432041.419734   19018 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cdd818e4e91658d:0:0), session_name()
I0000 00:00:1719432048.187642   19018 tpu_compile_op_common.cc:245] Compilation of cdd818e4e91658d:0:0 with session name  took 6.767852431s and succeeded
I0000 00:00:1719432048.230591   19018 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cdd818e4e91658d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_14770720999739894747", property.function_library_fingerprint = 18082821157485098661, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.w

Epoch 2/18


I0000 00:00:1719432049.010895   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4dc94578888329ba:0:0), session_name()
I0000 00:00:1719432099.218449   19012 tpu_compile_op_common.cc:245] Compilation of 4dc94578888329ba:0:0 with session name  took 50.207475168s and succeeded
I0000 00:00:1719432099.322232   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4dc94578888329ba:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15542619008925760805", property.function_library_fingerprint = 3017827087962089849, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
f1 score: 0.8152876496315002 and accuracy: 0.8225446343421936


[I 2024-06-26 20:02:58,825] Trial 1211 finished with value: 0.8225446343421936 and parameters: {'num_epochs': 18, 'dropout_rate': 0.20169850226276206, 'weight_decay': 0.02179110180115939, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.7740950232148123}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 20:04:24.793691: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719432268.728099   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(43c4e1c452913a02:0:0), session_name()
I0000 00:00:1719432318.337173   18995 tpu_compile_op_common.cc:245] Compilation of 43c4e1c452913a02:0:0 with session name  took 49.609008478s and succeeded
I0000 00:00:1719432318.474299   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(43c4e1c452913a02:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11127919831154468619", property.function_library_fingerprint = 6492350303108908352, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 20:05:33.506963: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719432334.284963   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2bc7e7c70311c8ea:0:0), session_name()
I0000 00:00:1719432341.216991   18995 tpu_compile_op_common.cc:245] Compilation of 2bc7e7c70311c8ea:0:0 with session name  took 6.931931232s and succeeded
I0000 00:00:1719432341.256218   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2bc7e7c70311c8ea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_13010410777555669381", property.function_library_fingerprint = 3417101549029820854, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719432342.013528   18942 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3239183ffaee19d9:0:0), session_name()
I0000 00:00:1719432388.537209   18942 tpu_compile_op_common.cc:245] Compilation of 3239183ffaee19d9:0:0 with session name  took 46.523617034s and succeeded
I0000 00:00:1719432388.668879   18942 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3239183ffaee19d9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11127919831154468619", property.function_library_fingerprint = 6492350303108908352, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7778874635696411 and accuracy: 0.8420758843421936


[I 2024-06-26 20:07:08,858] Trial 1219 finished with value: 0.8420758843421936 and parameters: {'num_epochs': 9, 'dropout_rate': 0.1543340464795556, 'weight_decay': 0.023765695817474338, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7057559720696389}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 20:08:35.078411: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719432518.774209   19010 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(afff05a7859c4f32:0:0), session_name()
I0000 00:00:1719432568.640834   19010 tpu_compile_op_common.cc:245] Compilation of afff05a7859c4f32:0:0 with session name  took 49.866564159s and succeeded
I0000 00:00:1719432568.746641   19010 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(afff05a7859c4f32:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18062910409170693579", property.function_library_fingerprint = 11786056036481884905, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 20:09:44.591730: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719432585.349230   18968 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fa6ab7923583eb4c:0:0), session_name()
I0000 00:00:1719432592.238613   18968 tpu_compile_op_common.cc:245] Compilation of fa6ab7923583eb4c:0:0 with session name  took 6.889306837s and succeeded
I0000 00:00:1719432592.293283   18968 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fa6ab7923583eb4c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_15065168744389816847", property.function_library_fingerprint = 9726510862048544675, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719432593.074626   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(78284a1cd38d1a1c:0:0), session_name()
I0000 00:00:1719432641.917742   18952 tpu_compile_op_common.cc:245] Compilation of 78284a1cd38d1a1c:0:0 with session name  took 48.84302412s and succeeded
I0000 00:00:1719432642.044592   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(78284a1cd38d1a1c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18062910409170693579", property.function_library_fingerprint = 11786056036481884905, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7846771478652954 and accuracy: 0.8470982313156128


[I 2024-06-26 20:11:24,942] Trial 1227 finished with value: 0.8470982313156128 and parameters: {'num_epochs': 10, 'dropout_rate': 0.1832558621878943, 'weight_decay': 0.01622214386393115, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4782468754346458}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 20:12:51.600483: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719432775.311117   18988 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2e00da230fdd6dd4:0:0), session_name()
I0000 00:00:1719432824.444515   18988 tpu_compile_op_common.cc:245] Compilation of 2e00da230fdd6dd4:0:0 with session name  took 49.133335806s and succeeded
I0000 00:00:1719432824.549659   18988 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2e00da230fdd6dd4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16458303380474867275", property.function_library_fingerprint = 2763580279252204326, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 20:13:59.502321: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719432840.235552   18981 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5463a7a03f4d82ba:0:0), session_name()
I0000 00:00:1719432847.095081   18981 tpu_compile_op_common.cc:245] Compilation of 5463a7a03f4d82ba:0:0 with session name  took 6.859459297s and succeeded
I0000 00:00:1719432847.145716   18981 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5463a7a03f4d82ba:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12685131514155482029", property.function_library_fingerprint = 13989380561020284652, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719432847.934754   18979 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b23c00c98a720fbb:0:0), session_name()
I0000 00:00:1719432894.879899   18979 tpu_compile_op_common.cc:245] Compilation of b23c00c98a720fbb:0:0 with session name  took 46.945095015s and succeeded
I0000 00:00:1719432895.004973   18979 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b23c00c98a720fbb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16458303380474867275", property.function_library_fingerprint = 2763580279252204326, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7672564387321472 and accuracy: 0.8325892686843872


[I 2024-06-26 20:15:38,461] Trial 1235 finished with value: 0.8325892686843872 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2559868733446794, 'weight_decay': 0.07067037682129995, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7387193477885994}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 20:17:08.559684: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719433032.090791   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c0420a41d9767f7a:0:0), session_name()
I0000 00:00:1719433082.900057   19011 tpu_compile_op_common.cc:245] Compilation of c0420a41d9767f7a:0:0 with session name  took 50.809191182s and succeeded
I0000 00:00:1719433083.038363   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c0420a41d9767f7a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15346519291585687575", property.function_library_fingerprint = 2258527747342906348, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 20:18:18.543109: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719433099.231757   18949 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(784ec3831f314292:0:0), session_name()
I0000 00:00:1719433106.073207   18949 tpu_compile_op_common.cc:245] Compilation of 784ec3831f314292:0:0 with session name  took 6.841386671s and succeeded
I0000 00:00:1719433106.114048   18949 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(784ec3831f314292:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_16820978401466348061", property.function_library_fingerprint = 10335324140377827326, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719433106.861501   18944 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8a2a8c2621c1dfc6:0:0), session_name()
I0000 00:00:1719433155.898816   18944 tpu_compile_op_common.cc:245] Compilation of 8a2a8c2621c1dfc6:0:0 with session name  took 49.03726199s and succeeded
I0000 00:00:1719433156.001293   18944 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8a2a8c2621c1dfc6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15346519291585687575", property.function_library_fingerprint = 2258527747342906348, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7661779522895813 and accuracy: 0.8454241156578064


[I 2024-06-26 20:19:59,324] Trial 1243 finished with value: 0.8454241156578064 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2642505426973661, 'weight_decay': 0.07370878137915392, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7220584182386109}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 20:21:24.370960: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719433288.054157   18975 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4b038875014c26ad:0:0), session_name()
I0000 00:00:1719433336.921320   18975 tpu_compile_op_common.cc:245] Compilation of 4b038875014c26ad:0:0 with session name  took 48.867119163s and succeeded
I0000 00:00:1719433337.029622   18975 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4b038875014c26ad:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12311216190676155849", property.function_library_fingerprint = 4796391961479798890, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 20:22:31.761954: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719433352.472269   19007 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(28d1a6f7e953ea52:0:0), session_name()
I0000 00:00:1719433359.256608   19007 tpu_compile_op_common.cc:245] Compilation of 28d1a6f7e953ea52:0:0 with session name  took 6.784280543s and succeeded
I0000 00:00:1719433359.294892   19007 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(28d1a6f7e953ea52:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_137755163973013659", property.function_library_fingerprint = 10127477829163067723, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/11


I0000 00:00:1719433360.027109   18986 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(abec9ebe3ee5f331:0:0), session_name()
I0000 00:00:1719433406.973089   18986 tpu_compile_op_common.cc:245] Compilation of abec9ebe3ee5f331:0:0 with session name  took 46.945932452s and succeeded
I0000 00:00:1719433407.074662   18986 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(abec9ebe3ee5f331:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12311216190676155849", property.function_library_fingerprint = 4796391961479798890, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7275940775871277 and accuracy: 0.8018973469734192


[I 2024-06-26 20:24:15,431] Trial 1251 finished with value: 0.8018973469734192 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2173816924574684, 'weight_decay': 0.07201882469182125, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6887262444728772}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 20:25:38.117019: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719433542.113991   18967 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8ce44019788fffe9:0:0), session_name()
I0000 00:00:1719433592.567306   18967 tpu_compile_op_common.cc:245] Compilation of 8ce44019788fffe9:0:0 with session name  took 50.453257268s and succeeded
I0000 00:00:1719433592.694781   18967 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8ce44019788fffe9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2627507205699918885", property.function_library_fingerprint = 3323832230975100181, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 20:26:47.820900: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719433608.584958   18934 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1ea0291055c693d3:0:0), session_name()
I0000 00:00:1719433615.380525   18934 tpu_compile_op_common.cc:245] Compilation of 1ea0291055c693d3:0:0 with session name  took 6.795511151s and succeeded
I0000 00:00:1719433615.428340   18934 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1ea0291055c693d3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_2870477494822442430", property.function_library_fingerprint = 13801100853941052493, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719433616.239474   18981 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7c0e964a575136f1:0:0), session_name()
I0000 00:00:1719433661.955088   18981 tpu_compile_op_common.cc:245] Compilation of 7c0e964a575136f1:0:0 with session name  took 45.71555553s and succeeded
I0000 00:00:1719433662.059134   18981 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7c0e964a575136f1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2627507205699918885", property.function_library_fingerprint = 3323832230975100181, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7542962431907654 and accuracy: 0.8158482313156128


[I 2024-06-26 20:28:26,609] Trial 1258 finished with value: 0.8158482313156128 and parameters: {'num_epochs': 10, 'dropout_rate': 0.22850427278867577, 'weight_decay': 0.04928081317504983, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.858456883568925}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 20:29:50.652526: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719433794.469887   18967 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bf1bf6f0fa57d322:0:0), session_name()
I0000 00:00:1719433846.026818   18967 tpu_compile_op_common.cc:245] Compilation of bf1bf6f0fa57d322:0:0 with session name  took 51.556865004s and succeeded
I0000 00:00:1719433846.131931   18967 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bf1bf6f0fa57d322:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16232954083406927819", property.function_library_fingerprint = 16103746683661756999, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 20:31:01.827569: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719433862.547964   18950 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b167665d5fd8c36b:0:0), session_name()
I0000 00:00:1719433869.256003   18950 tpu_compile_op_common.cc:245] Compilation of b167665d5fd8c36b:0:0 with session name  took 6.707970308s and succeeded
I0000 00:00:1719433869.295476   18950 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b167665d5fd8c36b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_14604351247501458331", property.function_library_fingerprint = 9630466406942508103, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719433870.072309   18937 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8c01065cb29c978:0:0), session_name()
I0000 00:00:1719433917.690940   18937 tpu_compile_op_common.cc:245] Compilation of 8c01065cb29c978:0:0 with session name  took 47.618580781s and succeeded
I0000 00:00:1719433917.792183   18937 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8c01065cb29c978:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16232954083406927819", property.function_library_fingerprint = 16103746683661756999, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7349890470504761 and accuracy: 0.8297991156578064


[I 2024-06-26 20:32:45,341] Trial 1266 finished with value: 0.8297991156578064 and parameters: {'num_epochs': 11, 'dropout_rate': 0.20796730098962485, 'weight_decay': 0.009443831497791473, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6809656296227611}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 20:34:11.154359: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719434054.854766   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(64d262c8cd6494bd:0:0), session_name()
I0000 00:00:1719434105.019755   18995 tpu_compile_op_common.cc:245] Compilation of 64d262c8cd6494bd:0:0 with session name  took 50.164895356s and succeeded
I0000 00:00:1719434105.124240   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(64d262c8cd6494bd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14908954026539433328", property.function_library_fingerprint = 10314597504706929091, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 20:35:19.893784: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719434120.653562   18985 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b6310f60c42c31fb:0:0), session_name()
I0000 00:00:1719434127.409977   18985 tpu_compile_op_common.cc:245] Compilation of b6310f60c42c31fb:0:0 with session name  took 6.756363277s and succeeded
I0000 00:00:1719434127.451224   18985 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b6310f60c42c31fb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_8860311170877528870", property.function_library_fingerprint = 7085852886208730065, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/11


I0000 00:00:1719434128.232371   18959 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7be0adc086bb7640:0:0), session_name()
I0000 00:00:1719434175.355333   18959 tpu_compile_op_common.cc:245] Compilation of 7be0adc086bb7640:0:0 with session name  took 47.122900144s and succeeded
I0000 00:00:1719434175.456779   18959 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7be0adc086bb7640:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14908954026539433328", property.function_library_fingerprint = 10314597504706929091, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7891630530357361 and accuracy: 0.8409598469734192


[I 2024-06-26 20:37:03,939] Trial 1274 finished with value: 0.8409598469734192 and parameters: {'num_epochs': 11, 'dropout_rate': 0.19889017206914883, 'weight_decay': 0.007492677662430812, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.71239049605038}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 20:38:29.986671: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719434314.107512   19007 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6218acab72a8cdef:0:0), session_name()
I0000 00:00:1719434364.561619   19007 tpu_compile_op_common.cc:245] Compilation of 6218acab72a8cdef:0:0 with session name  took 50.45404956s and succeeded
I0000 00:00:1719434364.670577   19007 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6218acab72a8cdef:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7292182228807051341", property.function_library_fingerprint = 9990569293639616559, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, t



2024-06-26 20:39:39.949823: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719434380.777493   19021 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bbc77093c35b8a31:0:0), session_name()
I0000 00:00:1719434387.714107   19021 tpu_compile_op_common.cc:245] Compilation of bbc77093c35b8a31:0:0 with session name  took 6.936517174s and succeeded
I0000 00:00:1719434387.769998   19021 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bbc77093c35b8a31:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_6699256857411345491", property.function_library_fingerprint = 811122289550645868, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.w

Epoch 2/10


I0000 00:00:1719434388.525080   19020 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(997cd05ffebb3fc9:0:0), session_name()
I0000 00:00:1719434435.980704   19020 tpu_compile_op_common.cc:245] Compilation of 997cd05ffebb3fc9:0:0 with session name  took 47.455569857s and succeeded
I0000 00:00:1719434436.084272   19020 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(997cd05ffebb3fc9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7292182228807051341", property.function_library_fingerprint = 9990569293639616559, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7693477272987366 and accuracy: 0.8515625


2024-06-26 20:41:29.023931: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719434489.778217   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(21d0c17beef5bddb:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719434496.171060   19011 tpu_compile_op_common.cc:245] Compilation of 21d0c17beef5bddb:0:0 with session name  took 6.392737102s and succeeded
I0000 00:00:1719434496.216610   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(21d0c17beef5bddb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_7586375071344500841", property.function_library_fingerprint = 193559601857381127, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719434496.216688   19011 tpu_compilation_cache_interface.cc:541] After adding entry for key 21d0c17beef5bddb:0



I0000 00:00:1719434497.453685   18932 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2221822691f7b483:0:0), session_name()




I0000 00:00:1719434503.437879   18932 tpu_compile_op_common.cc:245] Compilation of 2221822691f7b483:0:0 with session name  took 5.984145548s and succeeded
I0000 00:00:1719434503.474779   18932 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2221822691f7b483:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_7586375071344500841", property.function_library_fingerprint = 193559601857381127, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719434503.474834   18932 tpu_compilation_cache_interface.cc:541] After adding entry for key 2221822691f7b483:0

Epoch 1/3
Epoch 2/3


I0000 00:00:1719434505.376429   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e0d643b4e2029997:0:0), session_name()
I0000 00:00:1719434554.080840   18931 tpu_compile_op_common.cc:245] Compilation of e0d643b4e2029997:0:0 with session name  took 48.70425387s and succeeded
I0000 00:00:1719434554.190325   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e0d643b4e2029997:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7292182228807051341", property.function_library_fingerprint = 9990569293639616559, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/3
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_1063_accuracy_0.8482_avg_score_0.8035_f1_0.7588_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_1063_accuracy_0.8482_avg_score_0.8035_f1_0.7588_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 20:42:39,152] Trial 1282 finished with value: 0.8515625 and parameters: {'num_epochs': 10, 'dropout_rate': 0.24129528431133937, 'weight_decay': 0.012834529196262068, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7043095857542317}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 20:44:04.544187: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719434648.096140   19026 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(75a9938282d99652:0:0), session_name()
I0000 00:00:1719434697.840444   19026 tpu_compile_op_common.cc:245] Compilation of 75a9938282d99652:0:0 with session name  took 49.744238261s and succeeded
I0000 00:00:1719434697.962063   19026 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(75a9938282d99652:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5095560357262197016", property.function_library_fingerprint = 10202853679395704550, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 20:45:12.520831: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719434713.270580   18970 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c59ce6aa2b5831ee:0:0), session_name()
I0000 00:00:1719434720.241679   18970 tpu_compile_op_common.cc:245] Compilation of c59ce6aa2b5831ee:0:0 with session name  took 6.97104121s and succeeded
I0000 00:00:1719434720.275301   18970 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c59ce6aa2b5831ee:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_601817162587116948", property.function_library_fingerprint = 416045478905277795, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wra

Epoch 2/9


I0000 00:00:1719434721.017999   18948 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b3f99c46e648fe2a:0:0), session_name()
I0000 00:00:1719434770.315599   18948 tpu_compile_op_common.cc:245] Compilation of b3f99c46e648fe2a:0:0 with session name  took 49.297558428s and succeeded
I0000 00:00:1719434770.425018   18948 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b3f99c46e648fe2a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5095560357262197016", property.function_library_fingerprint = 10202853679395704550, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7629146575927734 and accuracy: 0.84375


[I 2024-06-26 20:46:49,079] Trial 1292 finished with value: 0.84375 and parameters: {'num_epochs': 9, 'dropout_rate': 0.23150324400761269, 'weight_decay': 0.013775938043539134, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7448102294464066}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 20:48:20.418415: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719434903.989389   18992 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5feca609a1fec26e:0:0), session_name()
I0000 00:00:1719434956.237485   18992 tpu_compile_op_common.cc:245] Compilation of 5feca609a1fec26e:0:0 with session name  took 52.248035332s and succeeded
I0000 00:00:1719434956.364565   18992 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5feca609a1fec26e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14553186464344709080", property.function_library_fingerprint = 2362029416585996942, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 20:49:31.237316: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719434971.934169   18974 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(19942a54219e469c:0:0), session_name()
I0000 00:00:1719434979.101042   18974 tpu_compile_op_common.cc:245] Compilation of 19942a54219e469c:0:0 with session name  took 7.166814328s and succeeded
I0000 00:00:1719434979.139456   18974 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(19942a54219e469c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10567577153823243419", property.function_library_fingerprint = 6893117999603425934, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719434980.738357   18961 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(38ccb83c1770eb81:0:0), session_name()
I0000 00:00:1719435029.619175   18961 tpu_compile_op_common.cc:245] Compilation of 38ccb83c1770eb81:0:0 with session name  took 48.880742549s and succeeded
I0000 00:00:1719435029.749162   18961 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(38ccb83c1770eb81:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_14553186464344709080", property.function_library_fingerprint = 2362029416585996942, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7559042572975159 and accuracy: 0.8314732313156128


[I 2024-06-26 20:51:08,630] Trial 1300 finished with value: 0.8314732313156128 and parameters: {'num_epochs': 9, 'dropout_rate': 0.254469422407587, 'weight_decay': 0.014657991187677613, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7837238518720904}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 20:52:32.747736: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719435156.345213   19003 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(289c56241e72a672:0:0), session_name()
I0000 00:00:1719435208.144193   19003 tpu_compile_op_common.cc:245] Compilation of 289c56241e72a672:0:0 with session name  took 51.798902078s and succeeded
I0000 00:00:1719435208.275777   19003 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(289c56241e72a672:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10899078049666646892", property.function_library_fingerprint = 15825467389171163722, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 20:53:42.689557: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719435223.396332   19001 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a81ae0d381ef026a:0:0), session_name()
I0000 00:00:1719435230.461112   19001 tpu_compile_op_common.cc:245] Compilation of a81ae0d381ef026a:0:0 with session name  took 7.064683172s and succeeded
I0000 00:00:1719435230.505757   19001 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a81ae0d381ef026a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_179948714766728708", property.function_library_fingerprint = 6805385926650484659, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.w

Epoch 2/10


I0000 00:00:1719435231.277738   18985 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ceeea74280a391ab:0:0), session_name()
I0000 00:00:1719435280.103788   18985 tpu_compile_op_common.cc:245] Compilation of ceeea74280a391ab:0:0 with session name  took 48.825986968s and succeeded
I0000 00:00:1719435280.211581   18985 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ceeea74280a391ab:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10899078049666646892", property.function_library_fingerprint = 15825467389171163722, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7383305430412292 and accuracy: 0.8236607313156128


[I 2024-06-26 20:55:23,348] Trial 1308 finished with value: 0.8236607313156128 and parameters: {'num_epochs': 10, 'dropout_rate': 0.23727950971044484, 'weight_decay': 0.01654959137726433, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6967268241840666}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


2024-06-26 20:56:49.517541: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719435413.050447   18948 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ffdcfdcab5dc8076:0:0), session_name()
I0000 00:00:1719435463.560637   18948 tpu_compile_op_common.cc:245] Compilation of ffdcfdcab5dc8076:0:0 with session name  took 50.51014099s and succeeded
I0000 00:00:1719435463.687273   18948 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ffdcfdcab5dc8076:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16163461204250822866", property.function_library_fingerprint = 10782640817183270548, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 20:57:58.436761: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719435479.159925   18972 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9ecaac11d4690aa2:0:0), session_name()
I0000 00:00:1719435486.251778   18972 tpu_compile_op_common.cc:245] Compilation of 9ecaac11d4690aa2:0:0 with session name  took 7.091785976s and succeeded
I0000 00:00:1719435486.296167   18972 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9ecaac11d4690aa2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_7557293752665952946", property.function_library_fingerprint = 18003610038525528876, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/8


I0000 00:00:1719435487.074736   18989 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f74e0e213dc63df5:0:0), session_name()
I0000 00:00:1719435537.163883   18989 tpu_compile_op_common.cc:245] Compilation of f74e0e213dc63df5:0:0 with session name  took 50.089073878s and succeeded
I0000 00:00:1719435537.271832   18989 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f74e0e213dc63df5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_16163461204250822866", property.function_library_fingerprint = 10782640817183270548, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7241443991661072 and accuracy: 0.8270089030265808


[I 2024-06-26 20:59:32,267] Trial 1315 finished with value: 0.8270089030265808 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2359123124346583, 'weight_decay': 0.017447606385206166, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7666117712284329}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


2024-06-26 21:00:55.467058: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719435659.148190   18997 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(af9f3fd145e2eaca:0:0), session_name()
I0000 00:00:1719435713.006783   18997 tpu_compile_op_common.cc:245] Compilation of af9f3fd145e2eaca:0:0 with session name  took 53.858532943s and succeeded
I0000 00:00:1719435713.143887   18997 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(af9f3fd145e2eaca:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10707322381950990597", property.function_library_fingerprint = 9913609132880071471, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 21:02:07.969855: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719435728.707507   18962 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(12a2807432fded28:0:0), session_name()
I0000 00:00:1719435735.725739   18962 tpu_compile_op_common.cc:245] Compilation of 12a2807432fded28:0:0 with session name  took 7.018177298s and succeeded
I0000 00:00:1719435735.764912   18962 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(12a2807432fded28:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4136893577580432441", property.function_library_fingerprint = 5632455655173070687, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/8


I0000 00:00:1719435736.571870   19025 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(aa5c2ab86516c24b:0:0), session_name()
I0000 00:00:1719435783.635388   19025 tpu_compile_op_common.cc:245] Compilation of aa5c2ab86516c24b:0:0 with session name  took 47.063474523s and succeeded
I0000 00:00:1719435783.750131   19025 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aa5c2ab86516c24b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_10707322381950990597", property.function_library_fingerprint = 9913609132880071471, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.749182403087616 and accuracy: 0.84765625


[I 2024-06-26 21:03:38,935] Trial 1323 finished with value: 0.84765625 and parameters: {'num_epochs': 8, 'dropout_rate': 0.24763199757852586, 'weight_decay': 0.012434804310959774, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7271296919804136}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 21:05:02.546148: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719435907.006487   18940 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(880c75a7a5b976f5:0:0), session_name()
I0000 00:00:1719435957.860051   18940 tpu_compile_op_common.cc:245] Compilation of 880c75a7a5b976f5:0:0 with session name  took 50.853474042s and succeeded
I0000 00:00:1719435957.976073   18940 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(880c75a7a5b976f5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4838905353731697605", property.function_library_fingerprint = 3367676694346080268, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:06:12.471880: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719435973.173501   18975 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b2a58ba2313dd5c4:0:0), session_name()
I0000 00:00:1719435980.178494   18975 tpu_compile_op_common.cc:245] Compilation of b2a58ba2313dd5c4:0:0 with session name  took 7.004923213s and succeeded
I0000 00:00:1719435980.234481   18975 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b2a58ba2313dd5c4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_6973886700352079360", property.function_library_fingerprint = 12437694064578732857, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/9


I0000 00:00:1719435980.981789   18945 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1263c33ffb8881f5:0:0), session_name()
I0000 00:00:1719436029.506115   18945 tpu_compile_op_common.cc:245] Compilation of 1263c33ffb8881f5:0:0 with session name  took 48.524284337s and succeeded
I0000 00:00:1719436029.617733   18945 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1263c33ffb8881f5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4838905353731697605", property.function_library_fingerprint = 3367676694346080268, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7725316286087036 and accuracy: 0.8426339030265808


[I 2024-06-26 21:07:48,463] Trial 1331 finished with value: 0.8426339030265808 and parameters: {'num_epochs': 9, 'dropout_rate': 0.22939308106191678, 'weight_decay': 0.04355600790428966, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8081013719370987}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 21:09:15.021818: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719436158.473606   19025 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e47fde78bfb90d9a:0:0), session_name()
I0000 00:00:1719436210.065306   19025 tpu_compile_op_common.cc:245] Compilation of e47fde78bfb90d9a:0:0 with session name  took 51.591637364s and succeeded
I0000 00:00:1719436210.177214   19025 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e47fde78bfb90d9a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9903490028621109010", property.function_library_fingerprint = 9220877401592529144, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:10:24.761374: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719436225.485304   18945 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(45899f14b770625d:0:0), session_name()
I0000 00:00:1719436232.608774   18945 tpu_compile_op_common.cc:245] Compilation of 45899f14b770625d:0:0 with session name  took 7.123397496s and succeeded
I0000 00:00:1719436232.657061   18945 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(45899f14b770625d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_17204870926530359441", property.function_library_fingerprint = 4606426834208589822, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719436233.433056   18980 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8cc20e2406774c24:0:0), session_name()
I0000 00:00:1719436282.581992   18980 tpu_compile_op_common.cc:245] Compilation of 8cc20e2406774c24:0:0 with session name  took 49.148879344s and succeeded
I0000 00:00:1719436282.711751   18980 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8cc20e2406774c24:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_9903490028621109010", property.function_library_fingerprint = 9220877401592529144, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7628393173217773 and accuracy: 0.8409598469734192


[I 2024-06-26 21:12:06,638] Trial 1339 finished with value: 0.8409598469734192 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2683237591684441, 'weight_decay': 0.01692708673773967, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6885234054881598}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 21:13:31.607552: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719436415.615063   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(be3d0a5ce1995ee2:0:0), session_name()
I0000 00:00:1719436467.193018   18947 tpu_compile_op_common.cc:245] Compilation of be3d0a5ce1995ee2:0:0 with session name  took 51.577888748s and succeeded
I0000 00:00:1719436467.324537   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(be3d0a5ce1995ee2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1766640818896048281", property.function_library_fingerprint = 16750998287494346562, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 21:14:42.009515: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719436482.748592   19026 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(761035c880dff6ce:0:0), session_name()
I0000 00:00:1719436489.569061   19026 tpu_compile_op_common.cc:245] Compilation of 761035c880dff6ce:0:0 with session name  took 6.820407177s and succeeded
I0000 00:00:1719436489.613386   19026 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(761035c880dff6ce:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12789960438339745052", property.function_library_fingerprint = 12975891409022233698, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719436490.393642   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(71ad7d55ef45c348:0:0), session_name()
I0000 00:00:1719436537.741381   18956 tpu_compile_op_common.cc:245] Compilation of 71ad7d55ef45c348:0:0 with session name  took 47.347675019s and succeeded
I0000 00:00:1719436537.867417   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(71ad7d55ef45c348:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1766640818896048281", property.function_library_fingerprint = 16750998287494346562, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7799693942070007 and accuracy: 0.8275669813156128


[I 2024-06-26 21:16:21,858] Trial 1347 finished with value: 0.8275669813156128 and parameters: {'num_epochs': 10, 'dropout_rate': 0.16240162934606758, 'weight_decay': 0.03213677965017883, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.7064425985581229}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 21:17:45.796608: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719436669.454170   18977 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fc8c252267b7df4f:0:0), session_name()
I0000 00:00:1719436720.007678   18977 tpu_compile_op_common.cc:245] Compilation of fc8c252267b7df4f:0:0 with session name  took 50.553448664s and succeeded
I0000 00:00:1719436720.135334   18977 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fc8c252267b7df4f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2000520559605007433", property.function_library_fingerprint = 16919974227179750608, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 21:18:55.701079: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719436736.471152   19016 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8c451e3c76a9f0f0:0:0), session_name()
I0000 00:00:1719436743.438456   19016 tpu_compile_op_common.cc:245] Compilation of 8c451e3c76a9f0f0:0:0 with session name  took 6.967240632s and succeeded
I0000 00:00:1719436743.480426   19016 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8c451e3c76a9f0f0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_5279197099423011818", property.function_library_fingerprint = 13147853900432951975, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719436744.241628   19001 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a656e9c03d68e3cf:0:0), session_name()
I0000 00:00:1719436793.065600   19001 tpu_compile_op_common.cc:245] Compilation of a656e9c03d68e3cf:0:0 with session name  took 48.823906854s and succeeded
I0000 00:00:1719436793.167460   19001 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a656e9c03d68e3cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2000520559605007433", property.function_library_fingerprint = 16919974227179750608, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7704271078109741 and accuracy: 0.84765625


[I 2024-06-26 21:20:40,200] Trial 1356 finished with value: 0.84765625 and parameters: {'num_epochs': 11, 'dropout_rate': 0.23568777304520558, 'weight_decay': 0.018865261601147498, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7339167143156823}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 21:22:06.261525: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719436929.980889   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6cd4f819223ba127:0:0), session_name()
I0000 00:00:1719436980.533009   18931 tpu_compile_op_common.cc:245] Compilation of 6cd4f819223ba127:0:0 with session name  took 50.552052409s and succeeded
I0000 00:00:1719436980.644985   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6cd4f819223ba127:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5245054487087810442", property.function_library_fingerprint = 8125355936696896848, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:23:15.150225: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719436995.879409   18975 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4f4eb91ee0ce00b6:0:0), session_name()
I0000 00:00:1719437002.576531   18975 tpu_compile_op_common.cc:245] Compilation of 4f4eb91ee0ce00b6:0:0 with session name  took 6.697056623s and succeeded
I0000 00:00:1719437002.624343   18975 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4f4eb91ee0ce00b6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_8347036572702697339", property.function_library_fingerprint = 1032079968163954316, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/9


I0000 00:00:1719437003.415907   18987 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b04644a46ddb0d14:0:0), session_name()
I0000 00:00:1719437052.445750   18987 tpu_compile_op_common.cc:245] Compilation of b04644a46ddb0d14:0:0 with session name  took 49.029760734s and succeeded
I0000 00:00:1719437052.554355   18987 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b04644a46ddb0d14:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5245054487087810442", property.function_library_fingerprint = 8125355936696896848, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7335920333862305 and accuracy: 0.8264508843421936


[I 2024-06-26 21:24:52,278] Trial 1364 finished with value: 0.8264508843421936 and parameters: {'num_epochs': 9, 'dropout_rate': 0.26153043499550904, 'weight_decay': 0.006452355186192518, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.6791843581460459}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 21:26:18.139905: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719437182.024082   18965 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bccd2d18720c011f:0:0), session_name()
I0000 00:00:1719437231.262063   18965 tpu_compile_op_common.cc:245] Compilation of bccd2d18720c011f:0:0 with session name  took 49.237935606s and succeeded
I0000 00:00:1719437231.367653   18965 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bccd2d18720c011f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17181828499583843901", property.function_library_fingerprint = 18155669256556234954, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 21:27:26.423982: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719437247.168110   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(29f6941eb34e8c44:0:0), session_name()
I0000 00:00:1719437254.171794   19012 tpu_compile_op_common.cc:245] Compilation of 29f6941eb34e8c44:0:0 with session name  took 7.003610993s and succeeded
I0000 00:00:1719437254.219516   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(29f6941eb34e8c44:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_6896033447920655132", property.function_library_fingerprint = 1180103860293840079, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/10


I0000 00:00:1719437255.017118   18957 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8d84d57fe0a3ab96:0:0), session_name()
I0000 00:00:1719437301.726215   18957 tpu_compile_op_common.cc:245] Compilation of 8d84d57fe0a3ab96:0:0 with session name  took 46.709046967s and succeeded
I0000 00:00:1719437301.828128   18957 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8d84d57fe0a3ab96:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17181828499583843901", property.function_library_fingerprint = 18155669256556234954, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7640517354011536 and accuracy: 0.8470982313156128


[I 2024-06-26 21:29:05,927] Trial 1372 finished with value: 0.8470982313156128 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2489581806700751, 'weight_decay': 0.011576781310492229, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.5473167829451087}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 21:30:30.870961: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719437434.659040   18933 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c8cedf68ac219deb:0:0), session_name()
I0000 00:00:1719437484.573612   18933 tpu_compile_op_common.cc:245] Compilation of c8cedf68ac219deb:0:0 with session name  took 49.914502468s and succeeded
I0000 00:00:1719437484.687338   18933 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c8cedf68ac219deb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17075067831644953554", property.function_library_fingerprint = 4333998703896116750, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 21:31:40.229472: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719437500.934815   18977 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(77f44452ad2a6cf:0:0), session_name()
I0000 00:00:1719437507.916253   18977 tpu_compile_op_common.cc:245] Compilation of 77f44452ad2a6cf:0:0 with session name  took 6.981381474s and succeeded
I0000 00:00:1719437507.959381   18977 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(77f44452ad2a6cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12098304911541071820", property.function_library_fingerprint = 9973393827594778102, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wr

Epoch 2/10


I0000 00:00:1719437508.709387   18971 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a8ed6bea1f836090:0:0), session_name()
I0000 00:00:1719437555.523660   18971 tpu_compile_op_common.cc:245] Compilation of a8ed6bea1f836090:0:0 with session name  took 46.814210483s and succeeded
I0000 00:00:1719437555.649445   18971 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a8ed6bea1f836090:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17075067831644953554", property.function_library_fingerprint = 4333998703896116750, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7740849852561951 and accuracy: 0.8404017686843872


[I 2024-06-26 21:33:18,778] Trial 1380 finished with value: 0.8404017686843872 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2421060483602097, 'weight_decay': 0.015004063661181195, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.7134537721082231}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


2024-06-26 21:34:46.288279: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719437690.059294   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b69d26fc50ddf21f:0:0), session_name()
I0000 00:00:1719437739.357086   18995 tpu_compile_op_common.cc:245] Compilation of b69d26fc50ddf21f:0:0 with session name  took 49.297705002s and succeeded
I0000 00:00:1719437739.475997   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b69d26fc50ddf21f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11634263876338473670", property.function_library_fingerprint = 13718671396961603251, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 21:35:54.170585: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719437754.877725   18991 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(61883b514d7b3d68:0:0), session_name()
I0000 00:00:1719437761.793553   18991 tpu_compile_op_common.cc:245] Compilation of 61883b514d7b3d68:0:0 with session name  took 6.915770081s and succeeded
I0000 00:00:1719437761.841774   18991 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(61883b514d7b3d68:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_9008663573854035145", property.function_library_fingerprint = 13066565559137654637, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/8


I0000 00:00:1719437762.643606   18977 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(40ecf84a4cbcb053:0:0), session_name()
I0000 00:00:1719437809.455644   18977 tpu_compile_op_common.cc:245] Compilation of 40ecf84a4cbcb053:0:0 with session name  took 46.811974632s and succeeded
I0000 00:00:1719437809.557544   18977 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(40ecf84a4cbcb053:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_11634263876338473670", property.function_library_fingerprint = 13718671396961603251, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7653951644897461 and accuracy: 0.7985491156578064


[I 2024-06-26 21:37:23,877] Trial 1388 finished with value: 0.7985491156578064 and parameters: {'num_epochs': 8, 'dropout_rate': 0.20027223812985015, 'weight_decay': 0.01532819361899346, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7325193935781021}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


2024-06-26 21:38:53.343077: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719437936.926022   18967 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cc00634a074ea2aa:0:0), session_name()
I0000 00:00:1719437989.254902   18967 tpu_compile_op_common.cc:245] Compilation of cc00634a074ea2aa:0:0 with session name  took 52.328812014s and succeeded
I0000 00:00:1719437989.387635   18967 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc00634a074ea2aa:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4750310378170686631", property.function_library_fingerprint = 1966662581181477191, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:40:04.750411: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719438005.461511   19020 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5cb1fb18f2a2c368:0:0), session_name()
I0000 00:00:1719438012.439331   19020 tpu_compile_op_common.cc:245] Compilation of 5cb1fb18f2a2c368:0:0 with session name  took 6.977674252s and succeeded
I0000 00:00:1719438012.480856   19020 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5cb1fb18f2a2c368:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_1906060230905920138", property.function_library_fingerprint = 253626897564270885, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.w

Epoch 2/9


I0000 00:00:1719438013.306002   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(23d2d06e7f21269:0:0), session_name()
I0000 00:00:1719438061.968063   18947 tpu_compile_op_common.cc:245] Compilation of 23d2d06e7f21269:0:0 with session name  took 48.661996066s and succeeded
I0000 00:00:1719438062.077630   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(23d2d06e7f21269:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4750310378170686631", property.function_library_fingerprint = 1966662581181477191, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_

Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7618260383605957 and accuracy: 0.8119419813156128


[I 2024-06-26 21:41:41,475] Trial 1396 finished with value: 0.8119419813156128 and parameters: {'num_epochs': 9, 'dropout_rate': 0.22032551878693046, 'weight_decay': 0.04545490873800471, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.749974467314599}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 21:43:04.586703: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719438188.214584   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3fe62613e575457a:0:0), session_name()
I0000 00:00:1719438238.634856   18956 tpu_compile_op_common.cc:245] Compilation of 3fe62613e575457a:0:0 with session name  took 50.420206217s and succeeded
I0000 00:00:1719438238.747475   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3fe62613e575457a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8675585146615434991", property.function_library_fingerprint = 7973967429605639227, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:44:13.589163: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719438254.337714   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a3f20121059aa736:0:0), session_name()
I0000 00:00:1719438261.244254   18947 tpu_compile_op_common.cc:245] Compilation of a3f20121059aa736:0:0 with session name  took 6.906473271s and succeeded
I0000 00:00:1719438261.300239   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a3f20121059aa736:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_1934644828151233537", property.function_library_fingerprint = 4487417392675341386, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/10


I0000 00:00:1719438262.074883   18973 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c4196e2b59f37497:0:0), session_name()
I0000 00:00:1719438309.164094   18973 tpu_compile_op_common.cc:245] Compilation of c4196e2b59f37497:0:0 with session name  took 47.089161179s and succeeded
I0000 00:00:1719438309.264140   18973 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c4196e2b59f37497:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_8675585146615434991", property.function_library_fingerprint = 7973967429605639227, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7177592515945435 and accuracy: 0.8225446343421936


[I 2024-06-26 21:45:53,773] Trial 1404 finished with value: 0.8225446343421936 and parameters: {'num_epochs': 10, 'dropout_rate': 0.23141270871370662, 'weight_decay': 0.016631582929335552, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8331892704902388}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 21:47:15.638959: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719438439.526013   19021 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(df936837ad10e4a0:0:0), session_name()
I0000 00:00:1719438490.213785   19021 tpu_compile_op_common.cc:245] Compilation of df936837ad10e4a0:0:0 with session name  took 50.687701276s and succeeded
I0000 00:00:1719438490.320737   19021 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(df936837ad10e4a0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_207309612814726980", property.function_library_fingerprint = 11320393779382864458, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:48:25.545297: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719438506.325023   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e3b08f7764d84c84:0:0), session_name()
I0000 00:00:1719438513.385020   19012 tpu_compile_op_common.cc:245] Compilation of e3b08f7764d84c84:0:0 with session name  took 7.059919735s and succeeded
I0000 00:00:1719438513.429430   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e3b08f7764d84c84:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_14246235426600582715", property.function_library_fingerprint = 12095555354545086461, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719438514.182080   18937 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(46819f800b1df69a:0:0), session_name()
I0000 00:00:1719438562.724314   18937 tpu_compile_op_common.cc:245] Compilation of 46819f800b1df69a:0:0 with session name  took 48.54217414s and succeeded
I0000 00:00:1719438562.833810   18937 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(46819f800b1df69a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_207309612814726980", property.function_library_fingerprint = 11320393779382864458, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7799256443977356 and accuracy: 0.8404017686843872


[I 2024-06-26 21:50:07,095] Trial 1412 finished with value: 0.8404017686843872 and parameters: {'num_epochs': 10, 'dropout_rate': 0.20957783288438817, 'weight_decay': 0.009020885680428798, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9010543016975678}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 21:51:29.923642: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719438693.510218   19012 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c3613a94429dcd53:0:0), session_name()
I0000 00:00:1719438744.172907   19012 tpu_compile_op_common.cc:245] Compilation of c3613a94429dcd53:0:0 with session name  took 50.662597589s and succeeded
I0000 00:00:1719438744.279581   19012 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c3613a94429dcd53:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1379437814342292974", property.function_library_fingerprint = 6476318062196427181, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:52:39.677047: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719438760.436961   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9ee74546c7ab5bd6:0:0), session_name()
I0000 00:00:1719438767.441218   19008 tpu_compile_op_common.cc:245] Compilation of 9ee74546c7ab5bd6:0:0 with session name  took 7.004192126s and succeeded
I0000 00:00:1719438767.483370   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9ee74546c7ab5bd6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_2166980609989202157", property.function_library_fingerprint = 13178190580928071867, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719438768.239724   18947 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8caa25fd85549080:0:0), session_name()
I0000 00:00:1719438815.312284   18947 tpu_compile_op_common.cc:245] Compilation of 8caa25fd85549080:0:0 with session name  took 47.072497931s and succeeded
I0000 00:00:1719438815.419364   18947 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8caa25fd85549080:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1379437814342292974", property.function_library_fingerprint = 6476318062196427181, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7918738722801208 and accuracy: 0.8521205186843872


2024-06-26 21:54:31.669317: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719438872.361710   18953 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8f3273e6d39ebdfb:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719438879.005085   18953 tpu_compile_op_common.cc:245] Compilation of 8f3273e6d39ebdfb:0:0 with session name  took 6.643296298s and succeeded
I0000 00:00:1719438879.046125   18953 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8f3273e6d39ebdfb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5751219752685140027", property.function_library_fingerprint = 14555667680135393109, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719438879.046195   18953 tpu_compilation_cache_interface.cc:541] After adding entry for key 8f3273e6d39ebdfb



I0000 00:00:1719438880.262116   18971 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(32b4dbbf3bbf5a78:0:0), session_name()




I0000 00:00:1719438886.197255   18971 tpu_compile_op_common.cc:245] Compilation of 32b4dbbf3bbf5a78:0:0 with session name  took 5.935080903s and succeeded
I0000 00:00:1719438886.233509   18971 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(32b4dbbf3bbf5a78:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5751219752685140027", property.function_library_fingerprint = 14555667680135393109, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719438886.233564   18971 tpu_compilation_cache_interface.cc:541] After adding entry for key 32b4dbbf3bbf5a78

Epoch 1/4
Epoch 2/4


I0000 00:00:1719438888.118899   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(84a2fb4d9cfde353:0:0), session_name()
I0000 00:00:1719438936.924349   18931 tpu_compile_op_common.cc:245] Compilation of 84a2fb4d9cfde353:0:0 with session name  took 48.805377287s and succeeded
I0000 00:00:1719438937.038611   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(84a2fb4d9cfde353:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1379437814342292974", property.function_library_fingerprint = 6476318062196427181, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partitio

Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_1013_accuracy_0.8488_avg_score_0.8059_f1_0.7630_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_1013_accuracy_0.8488_avg_score_0.8059_f1_0.7630_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 21:55:43,086] Trial 1419 finished with value: 0.8521205186843872 and parameters: {'num_epochs': 11, 'dropout_rate': 0.20789719965894293, 'weight_decay': 0.018667515136372166, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7848472911409052}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 21:57:07.476543: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719439031.048101   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3386f71612c714e0:0:0), session_name()
I0000 00:00:1719439082.092649   18995 tpu_compile_op_common.cc:245] Compilation of 3386f71612c714e0:0:0 with session name  took 51.044472308s and succeeded
I0000 00:00:1719439082.215668   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3386f71612c714e0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5723043736185205836", property.function_library_fingerprint = 5479347031063486576, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 21:58:17.233886: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719439097.939036   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1446aeb5eb96362f:0:0), session_name()
I0000 00:00:1719439105.083898   18931 tpu_compile_op_common.cc:245] Compilation of 1446aeb5eb96362f:0:0 with session name  took 7.144780979s and succeeded
I0000 00:00:1719439105.118982   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1446aeb5eb96362f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_13475717520108426200", property.function_library_fingerprint = 15365443485086742083, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/11


I0000 00:00:1719439105.884722   18938 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2fd3a2152454d97:0:0), session_name()
I0000 00:00:1719439153.847765   18938 tpu_compile_op_common.cc:245] Compilation of 2fd3a2152454d97:0:0 with session name  took 47.962983884s and succeeded
I0000 00:00:1719439153.980189   18938 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2fd3a2152454d97:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_5723043736185205836", property.function_library_fingerprint = 5479347031063486576, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7719041109085083 and accuracy: 0.83984375


[I 2024-06-26 22:00:02,556] Trial 1428 finished with value: 0.83984375 and parameters: {'num_epochs': 11, 'dropout_rate': 0.18823424281868906, 'weight_decay': 0.019865054318331035, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7964810561668464}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 22:01:24.195821: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719439287.886349   18937 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3038870b25aca329:0:0), session_name()
I0000 00:00:1719439338.501092   18937 tpu_compile_op_common.cc:245] Compilation of 3038870b25aca329:0:0 with session name  took 50.614667836s and succeeded
I0000 00:00:1719439338.624850   18937 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3038870b25aca329:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17823125376754139239", property.function_library_fingerprint = 2776590578125534567, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 22:02:34.296781: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719439355.016770   18958 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e588c6977b98f7a5:0:0), session_name()
I0000 00:00:1719439361.962181   18958 tpu_compile_op_common.cc:245] Compilation of e588c6977b98f7a5:0:0 with session name  took 6.945361413s and succeeded
I0000 00:00:1719439362.001382   18958 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e588c6977b98f7a5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10543802434781285403", property.function_library_fingerprint = 6040705771926483444, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/12


I0000 00:00:1719439362.776033   18938 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(43ef96efafa0a21f:0:0), session_name()
I0000 00:00:1719439411.266607   18938 tpu_compile_op_common.cc:245] Compilation of 43ef96efafa0a21f:0:0 with session name  took 48.490494177s and succeeded
I0000 00:00:1719439411.387702   18938 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(43ef96efafa0a21f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_17823125376754139239", property.function_library_fingerprint = 2776590578125534567, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7800949215888977 and accuracy: 0.8459821343421936


[I 2024-06-26 22:04:23,116] Trial 1435 finished with value: 0.8459821343421936 and parameters: {'num_epochs': 12, 'dropout_rate': 0.198848365010933, 'weight_decay': 0.020908830171405778, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.4136795947646433}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 22:05:47.176690: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719439550.802642   18939 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a22f6f9ae147a1c0:0:0), session_name()
I0000 00:00:1719439601.968307   18939 tpu_compile_op_common.cc:245] Compilation of a22f6f9ae147a1c0:0:0 with session name  took 51.165605044s and succeeded
I0000 00:00:1719439602.084047   18939 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a22f6f9ae147a1c0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15421133589924745041", property.function_library_fingerprint = 6010827992858563799, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 22:06:57.132949: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719439617.865686   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cc49d75044d2857f:0:0), session_name()
I0000 00:00:1719439625.077798   19011 tpu_compile_op_common.cc:245] Compilation of cc49d75044d2857f:0:0 with session name  took 7.212032057s and succeeded
I0000 00:00:1719439625.125524   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc49d75044d2857f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_15494168720317424452", property.function_library_fingerprint = 5787737219829993248, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719439625.908566   18971 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4d82c33e3bdf688:0:0), session_name()
I0000 00:00:1719439675.620719   18971 tpu_compile_op_common.cc:245] Compilation of 4d82c33e3bdf688:0:0 with session name  took 49.712081644s and succeeded
I0000 00:00:1719439675.762299   18971 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4d82c33e3bdf688:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15421133589924745041", property.function_library_fingerprint = 6010827992858563799, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7732509970664978 and accuracy: 0.8348214030265808


[I 2024-06-26 22:08:44,773] Trial 1443 finished with value: 0.8348214030265808 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2002740968817328, 'weight_decay': 0.018251359536946104, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8220501390748756}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 22:10:07.331746: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719439811.022778   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(44196a19b55e569e:0:0), session_name()
I0000 00:00:1719439863.834691   19008 tpu_compile_op_common.cc:245] Compilation of 44196a19b55e569e:0:0 with session name  took 52.811843199s and succeeded
I0000 00:00:1719439863.966169   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(44196a19b55e569e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1490684365989523519", property.function_library_fingerprint = 16873637140512678874, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 22:11:20.403188: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719439881.146061   19017 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b35082a778f91a5d:0:0), session_name()
I0000 00:00:1719439888.077776   19017 tpu_compile_op_common.cc:245] Compilation of b35082a778f91a5d:0:0 with session name  took 6.931673822s and succeeded
I0000 00:00:1719439888.122312   19017 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b35082a778f91a5d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10178691296680544607", property.function_library_fingerprint = 4516238542658777352, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719439888.907631   18948 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3dad0d80bb92e6c3:0:0), session_name()
I0000 00:00:1719439937.592891   18948 tpu_compile_op_common.cc:245] Compilation of 3dad0d80bb92e6c3:0:0 with session name  took 48.685199905s and succeeded
I0000 00:00:1719439937.717090   18948 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3dad0d80bb92e6c3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_1490684365989523519", property.function_library_fingerprint = 16873637140512678874, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7906361818313599 and accuracy: 0.8443080186843872


[I 2024-06-26 22:13:05,600] Trial 1451 finished with value: 0.8443080186843872 and parameters: {'num_epochs': 11, 'dropout_rate': 0.17839938023159935, 'weight_decay': 0.01700855281490907, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7889007578708982}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 22:14:29.225946: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719440072.947702   18971 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4ff0992a503fc547:0:0), session_name()
I0000 00:00:1719440123.541025   18971 tpu_compile_op_common.cc:245] Compilation of 4ff0992a503fc547:0:0 with session name  took 50.59324066s and succeeded
I0000 00:00:1719440123.645491   18971 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4ff0992a503fc547:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18240681676132313676", property.function_library_fingerprint = 2131853057379705295, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 22:15:38.520621: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719440139.232347   19004 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2268ea1dfcab81b6:0:0), session_name()
I0000 00:00:1719440146.161437   19004 tpu_compile_op_common.cc:245] Compilation of 2268ea1dfcab81b6:0:0 with session name  took 6.929035728s and succeeded
I0000 00:00:1719440146.205703   19004 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2268ea1dfcab81b6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4698421003356091839", property.function_library_fingerprint = 3740781059682469870, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/11


I0000 00:00:1719440146.975756   18967 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f261831b1d13dd1a:0:0), session_name()
I0000 00:00:1719440195.699523   18967 tpu_compile_op_common.cc:245] Compilation of f261831b1d13dd1a:0:0 with session name  took 48.723698309s and succeeded
I0000 00:00:1719440195.833269   18967 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f261831b1d13dd1a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18240681676132313676", property.function_library_fingerprint = 2131853057379705295, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7267701029777527 and accuracy: 0.7985491156578064


[I 2024-06-26 22:17:24,442] Trial 1459 finished with value: 0.7985491156578064 and parameters: {'num_epochs': 11, 'dropout_rate': 0.17371818693675423, 'weight_decay': 0.018716795325126327, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8321608488346754}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 22:18:44.370155: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719440328.145372   18952 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(19b52e3ffc795bb8:0:0), session_name()
I0000 00:00:1719440378.330514   18952 tpu_compile_op_common.cc:245] Compilation of 19b52e3ffc795bb8:0:0 with session name  took 50.18505591s and succeeded
I0000 00:00:1719440378.469817   18952 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(19b52e3ffc795bb8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7106523081118028782", property.function_library_fingerprint = 10033364356521988420, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 22:19:53.183236: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719440393.925598   18966 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cffcbac9bbcbba24:0:0), session_name()
I0000 00:00:1719440400.876548   18966 tpu_compile_op_common.cc:245] Compilation of cffcbac9bbcbba24:0:0 with session name  took 6.950891838s and succeeded
I0000 00:00:1719440400.924823   18966 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cffcbac9bbcbba24:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_12348829546016691640", property.function_library_fingerprint = 8464487324554713058, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/12


I0000 00:00:1719440401.712369   18973 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(af93c0075ca9932:0:0), session_name()
I0000 00:00:1719440448.332164   18973 tpu_compile_op_common.cc:245] Compilation of af93c0075ca9932:0:0 with session name  took 46.619735559s and succeeded
I0000 00:00:1719440448.456535   18973 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(af93c0075ca9932:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_7106523081118028782", property.function_library_fingerprint = 10033364356521988420, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.793457567691803 and accuracy: 0.8387276530265808


[I 2024-06-26 22:21:42,004] Trial 1467 finished with value: 0.8387276530265808 and parameters: {'num_epochs': 12, 'dropout_rate': 0.16596710148471358, 'weight_decay': 0.02345024168747522, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8587312565758358}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 22:23:03.269759: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719440586.856737   19016 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2348c46d557361be:0:0), session_name()
I0000 00:00:1719440636.796213   19016 tpu_compile_op_common.cc:245] Compilation of 2348c46d557361be:0:0 with session name  took 49.939410679s and succeeded
I0000 00:00:1719440636.927083   19016 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2348c46d557361be:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15559235852749564458", property.function_library_fingerprint = 1914005674402104944, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 22:24:12.110544: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719440652.815954   19023 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(37eadad5c10ee13e:0:0), session_name()
I0000 00:00:1719440660.134359   19023 tpu_compile_op_common.cc:245] Compilation of 37eadad5c10ee13e:0:0 with session name  took 7.318339564s and succeeded
I0000 00:00:1719440660.185192   19023 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(37eadad5c10ee13e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_5859811662305190606", property.function_library_fingerprint = 4868061870123453629, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/11


I0000 00:00:1719440660.957083   18987 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7a38402e79a064de:0:0), session_name()
I0000 00:00:1719440710.148093   18987 tpu_compile_op_common.cc:245] Compilation of 7a38402e79a064de:0:0 with session name  took 49.1909312s and succeeded
I0000 00:00:1719440710.275880   18987 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7a38402e79a064de:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_15559235852749564458", property.function_library_fingerprint = 1914005674402104944, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7690972089767456 and accuracy: 0.8219866156578064


[I 2024-06-26 22:25:57,884] Trial 1475 finished with value: 0.8219866156578064 and parameters: {'num_epochs': 11, 'dropout_rate': 0.1788758568001354, 'weight_decay': 0.03772116991756511, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.8040027751389401}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 22:27:21.615457: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719440845.083141   18940 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(93aae792a8a006f5:0:0), session_name()
I0000 00:00:1719440895.087597   18940 tpu_compile_op_common.cc:245] Compilation of 93aae792a8a006f5:0:0 with session name  took 50.004377567s and succeeded
I0000 00:00:1719440895.191671   18940 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(93aae792a8a006f5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3694129520702226317", property.function_library_fingerprint = 18234263735789783316, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 22:28:29.483223: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719440910.226665   18967 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(95490d3c3a75d10e:0:0), session_name()
I0000 00:00:1719440917.099829   18967 tpu_compile_op_common.cc:245] Compilation of 95490d3c3a75d10e:0:0 with session name  took 6.873085734s and succeeded
I0000 00:00:1719440917.135490   18967 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(95490d3c3a75d10e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_3953090970978865155", property.function_library_fingerprint = 15511024026103694954, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/10


I0000 00:00:1719440917.922594   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d2476a48509085b4:0:0), session_name()
I0000 00:00:1719440963.706618   18956 tpu_compile_op_common.cc:245] Compilation of d2476a48509085b4:0:0 with session name  took 45.783938868s and succeeded
I0000 00:00:1719440963.808311   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d2476a48509085b4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3694129520702226317", property.function_library_fingerprint = 18234263735789783316, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7690590023994446 and accuracy: 0.8498883843421936


2024-06-26 22:30:16.337469: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1719441017.077528   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d868ef34a05af839:0:0), session_name()


 3/13 [=====>........................] - ETA: 0s  

I0000 00:00:1719441023.623079   19011 tpu_compile_op_common.cc:245] Compilation of d868ef34a05af839:0:0 with session name  took 6.545472966s and succeeded
I0000 00:00:1719441023.666892   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d868ef34a05af839:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_11628817007228098097", property.function_library_fingerprint = 11628617098728578297, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719441023.666953   19011 tpu_compilation_cache_interface.cc:541] After adding entry for key d868ef34a05af83



I0000 00:00:1719441024.853218   18955 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eb6ba76a4240bf2a:0:0), session_name()




I0000 00:00:1719441030.846771   18955 tpu_compile_op_common.cc:245] Compilation of eb6ba76a4240bf2a:0:0 with session name  took 5.993482062s and succeeded
I0000 00:00:1719441030.891364   18955 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eb6ba76a4240bf2a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_11628817007228098097", property.function_library_fingerprint = 11628617098728578297, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,64,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1719441030.891433   18955 tpu_compilation_cache_interface.cc:541] After adding entry for key eb6ba76a4240bf2

Epoch 1/3
Epoch 2/3


I0000 00:00:1719441032.733100   19022 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3f9d0b54ca1ac871:0:0), session_name()
I0000 00:00:1719441079.248484   19022 tpu_compile_op_common.cc:245] Compilation of 3f9d0b54ca1ac871:0:0 with session name  took 46.515319126s and succeeded
I0000 00:00:1719441079.349334   19022 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3f9d0b54ca1ac871:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3694129520702226317", property.function_library_fingerprint = 18234263735789783316, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,64,;14,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/3
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_759_accuracy_0.8488_avg_score_0.8036_f1_0.7584_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_tfrecord_BERT_1_model_trial_759_accuracy_0.8488_avg_score_0.8036_f1_0.7584_post_fine_tuning_submission.csv has been removed.


[I 2024-06-26 22:31:24,280] Trial 1483 finished with value: 0.8498883843421936 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2067011882196325, 'weight_decay': 0.02070085713955553, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7774570512554616}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 22:32:46.730323: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719441170.402851   18945 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fbd16876464d07d6:0:0), session_name()
I0000 00:00:1719441220.426876   18945 tpu_compile_op_common.cc:245] Compilation of fbd16876464d07d6:0:0 with session name  took 50.023972233s and succeeded
I0000 00:00:1719441220.530079   18945 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fbd16876464d07d6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18045078613356232307", property.function_library_fingerprint = 16767728673405243292, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 22:33:55.026714: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719441235.775439   18988 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2e362ee72a093f1:0:0), session_name()
I0000 00:00:1719441242.662065   18988 tpu_compile_op_common.cc:245] Compilation of 2e362ee72a093f1:0:0 with session name  took 6.886558788s and succeeded
I0000 00:00:1719441242.701002   18988 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2e362ee72a093f1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_9413808062655422114", property.function_library_fingerprint = 10586040646981166707, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wr

Epoch 2/10


I0000 00:00:1719441243.458482   19025 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(579d4eb51be8eb1:0:0), session_name()
I0000 00:00:1719441291.267484   19025 tpu_compile_op_common.cc:245] Compilation of 579d4eb51be8eb1:0:0 with session name  took 47.808928838s and succeeded
I0000 00:00:1719441291.393893   19025 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(579d4eb51be8eb1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_18045078613356232307", property.function_library_fingerprint = 16767728673405243292, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partition

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7625345587730408 and accuracy: 0.84375


[I 2024-06-26 22:35:34,623] Trial 1493 finished with value: 0.84375 and parameters: {'num_epochs': 10, 'dropout_rate': 0.21326625494481966, 'weight_decay': 0.049235677049756794, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.765170794257781}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 22:37:01.633443: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719441425.478179   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(944737459385f382:0:0), session_name()
I0000 00:00:1719441474.373700   19008 tpu_compile_op_common.cc:245] Compilation of 944737459385f382:0:0 with session name  took 48.895430068s and succeeded
I0000 00:00:1719441474.478573   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(944737459385f382:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2311307587479774015", property.function_library_fingerprint = 17781530009989563699, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 22:38:09.797528: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719441490.571247   18972 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f9172c817f2be74b:0:0), session_name()
I0000 00:00:1719441497.692966   18972 tpu_compile_op_common.cc:245] Compilation of f9172c817f2be74b:0:0 with session name  took 7.121630555s and succeeded
I0000 00:00:1719441497.730922   18972 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f9172c817f2be74b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_16564926325181781397", property.function_library_fingerprint = 18181708131874782512, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/11


I0000 00:00:1719441498.544241   18949 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(930f6ed92dcb6d71:0:0), session_name()
I0000 00:00:1719441545.304813   18949 tpu_compile_op_common.cc:245] Compilation of 930f6ed92dcb6d71:0:0 with session name  took 46.760506062s and succeeded
I0000 00:00:1719441545.405373   18949 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(930f6ed92dcb6d71:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_2311307587479774015", property.function_library_fingerprint = 17781530009989563699, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7899892926216125 and accuracy: 0.8404017686843872


[I 2024-06-26 22:39:54,305] Trial 1501 finished with value: 0.8404017686843872 and parameters: {'num_epochs': 11, 'dropout_rate': 0.19113138673149288, 'weight_decay': 0.016513819487814344, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8152902180564671}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


2024-06-26 22:41:17.176615: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719441680.773314   19008 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f63913143f227dbd:0:0), session_name()
I0000 00:00:1719441729.969602   19008 tpu_compile_op_common.cc:245] Compilation of f63913143f227dbd:0:0 with session name  took 49.196199947s and succeeded
I0000 00:00:1719441730.074304   19008 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f63913143f227dbd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3348252259421214459", property.function_library_fingerprint = 15807388145529034203, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1,



2024-06-26 22:42:24.668794: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719441745.357443   18949 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a1d5901b410f5be8:0:0), session_name()
I0000 00:00:1719441752.445519   18949 tpu_compile_op_common.cc:245] Compilation of a1d5901b410f5be8:0:0 with session name  took 7.088017577s and succeeded
I0000 00:00:1719441752.490556   18949 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a1d5901b410f5be8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4292339392412235889", property.function_library_fingerprint = 17913052043101693284, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology

Epoch 2/11


I0000 00:00:1719441753.236083   18992 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8a5b5cf2af49a54a:0:0), session_name()
I0000 00:00:1719441801.109486   18992 tpu_compile_op_common.cc:245] Compilation of 8a5b5cf2af49a54a:0:0 with session name  took 47.873323041s and succeeded
I0000 00:00:1719441801.238723   18992 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8a5b5cf2af49a54a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_3348252259421214459", property.function_library_fingerprint = 15807388145529034203, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partiti

Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7907066345214844 and accuracy: 0.8487723469734192


[I 2024-06-26 22:44:09,141] Trial 1509 finished with value: 0.8487723469734192 and parameters: {'num_epochs': 11, 'dropout_rate': 0.19686403469913627, 'weight_decay': 0.04184016478698852, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3672228984542409}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2


2024-06-26 22:45:34.479981: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719441937.986506   18963 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8619212be847e255:0:0), session_name()
I0000 00:00:1719441986.390122   18963 tpu_compile_op_common.cc:245] Compilation of 8619212be847e255:0:0 with session name  took 48.403559121s and succeeded
I0000 00:00:1719441986.493315   18963 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8619212be847e255:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4197759246215380645", property.function_library_fingerprint = 1516860054102541450, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, 



2024-06-26 22:46:41.064659: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719442001.835138   19025 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2944efb5c5f529a2:0:0), session_name()
I0000 00:00:1719442008.675439   19025 tpu_compile_op_common.cc:245] Compilation of 2944efb5c5f529a2:0:0 with session name  took 6.84023816s and succeeded
I0000 00:00:1719442008.727291   19025 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2944efb5c5f529a2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_10602774164278779540", property.function_library_fingerprint = 6818108965041710292, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.

Epoch 2/2


I0000 00:00:1719442009.461399   19011 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(71c6d834de39b905:0:0), session_name()
I0000 00:00:1719442056.119285   19011 tpu_compile_op_common.cc:245] Compilation of 71c6d834de39b905:0:0 with session name  took 46.657824907s and succeeded
I0000 00:00:1719442056.254614   19011 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(71c6d834de39b905:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_4197759246215380645", property.function_library_fingerprint = 1516860054102541450, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitio

f1 score: 0.5938652753829956 and accuracy: 0.8091517686843872


[I 2024-06-26 22:47:43,335] Trial 1517 finished with value: 0.8091517686843872 and parameters: {'num_epochs': 2, 'dropout_rate': 0.2020766849555769, 'weight_decay': 0.01829318061359868, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.39022835673778705}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


2024-06-26 22:49:09.142918: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719442152.541635   18948 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fcc21f13b444e0c5:0:0), session_name()
I0000 00:00:1719442203.192595   18948 tpu_compile_op_common.cc:245] Compilation of fcc21f13b444e0c5:0:0 with session name  took 50.650882942s and succeeded
I0000 00:00:1719442203.332905   18948 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fcc21f13b444e0c5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12910534826511721475", property.function_library_fingerprint = 18436871734388783468, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1



2024-06-26 22:50:18.582494: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719442219.281057   18972 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d2444e2dc09f1ab7:0:0), session_name()
I0000 00:00:1719442226.250461   18972 tpu_compile_op_common.cc:245] Compilation of d2444e2dc09f1ab7:0:0 with session name  took 6.969334887s and succeeded
I0000 00:00:1719442226.298175   18972 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d2444e2dc09f1ab7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_15120052925695825583", property.function_library_fingerprint = 18328571113037275670, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog

Epoch 2/10


I0000 00:00:1719442227.099662   18956 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f19a2159dd92e2fc:0:0), session_name()
I0000 00:00:1719442275.080900   18956 tpu_compile_op_common.cc:245] Compilation of f19a2159dd92e2fc:0:0 with session name  took 47.981159612s and succeeded
I0000 00:00:1719442275.208605   18956 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f19a2159dd92e2fc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_12910534826511721475", property.function_library_fingerprint = 18436871734388783468, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partit

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7237737774848938 and accuracy: 0.8448660969734192


[I 2024-06-26 22:51:58,135] Trial 1523 finished with value: 0.8448660969734192 and parameters: {'num_epochs': 10, 'dropout_rate': 0.3549634325293495, 'weight_decay': 0.02209628530512266, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.796319316838797}. Best is trial 812 with value: 0.8565848469734192.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


2024-06-26 22:53:19.417640: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AdamW/AssignAddVariableOp.
I0000 00:00:1719442402.756436   18931 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4664c535c513832c:0:0), session_name()
I0000 00:00:1719442452.423584   18931 tpu_compile_op_common.cc:245] Compilation of 4664c535c513832c:0:0 with session name  took 49.667061876s and succeeded
I0000 00:00:1719442452.556319   18931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4664c535c513832c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_407517389302364920", property.function_library_fingerprint = 365059460586657347, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, to



2024-06-26 22:54:27.042722: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.
I0000 00:00:1719442467.814300   18995 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a3ad8be1d99d4ad:0:0), session_name()
I0000 00:00:1719442474.850136   18995 tpu_compile_op_common.cc:245] Compilation of a3ad8be1d99d4ad:0:0 with session name  took 7.035764371s and succeeded
I0000 00:00:1719442474.890200   18995 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a3ad8be1d99d4ad:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_test_function_4864004641508738385", property.function_library_fingerprint = 16750746830751999311, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wr

Epoch 2/12


I0000 00:00:1719442475.684300   19007 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2eca367c95d24d18:0:0), session_name()
I0000 00:00:1719442522.794414   19007 tpu_compile_op_common.cc:245] Compilation of 2eca367c95d24d18:0:0 with session name  took 47.110048518s and succeeded
I0000 00:00:1719442522.925953   19007 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2eca367c95d24d18:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_train_function_407517389302364920", property.function_library_fingerprint = 365059460586657347, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,64,;10,;", property.guaranteed_constants_size = 0, embedding_partitions

Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12