# This notebook tests out if the results are better if I predict on models before or after fine-tuning on the validation data. I found that for this notebook specifically (not using .tfrecord), the pre-fine-tuning was better. So fine-tuning was a waste of time here. But when I switched to .tfrecord in the next notebook, I found that fine-tuning actually helped score

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nlp-getting-started/sample_submission.csv
/kaggle/input/nlp-getting-started/train.csv
/kaggle/input/nlp-getting-started/test.csv
/kaggle/input/certification/BaltimoreCyberTrustRoot.crt.pem


In [None]:
import numpy as np
import pandas as pd
import random
import os
import re
import json
from transformers import set_seed, BertTokenizer, TFBertForSequenceClassification, BertConfig
import tensorflow as tf
%pip install optuna
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)
set_seed(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Install necessary packages for Azure SQL connection
%pip install mysql-connector-python 
%pip install PyMySQL

# Suppress TensorFlow logging
tf.get_logger().setLevel('ERROR')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Suppress other warnings (commented out for now)
# warnings.filterwarnings('ignore')

# Suppress absl TPU cache logging
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

# Additional configuration to suppress specific TPU cache logs
tf.autograph.set_verbosity(3)
tf.get_logger().propagate = False

# Load the training data
train_data = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
kaggle_test_data = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')

# Split the data into 75% training and 25% validation sets
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42, stratify=train_data['target'])

# Clean the text data
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)     # Remove mentions
    text = re.sub(r'\d+', '', text)      # Remove numbers
    text = re.sub(r'[^\w\s#]', '', text)  # Remove punctuation except hashtags
    text = text.lower()                  # Convert to lowercase
    return text

train_data['clean_text'] = train_data['text'].apply(clean_text)
val_data['clean_text'] = val_data['text'].apply(clean_text)
kaggle_test_data['clean_text'] = kaggle_test_data['text'].apply(clean_text)

# Tokenize the text data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=64,
        padding=True,
        truncation=True,
        return_tensors='tf'
    )

# Encode the clean text data
train_encodings = tokenize_texts(train_data['clean_text'])
val_encodings = tokenize_texts(val_data['clean_text'])
kaggle_test_encodings = tokenize_texts(kaggle_test_data['clean_text'])

train_labels = tf.convert_to_tensor(train_data['target'].values)
val_labels = tf.convert_to_tensor(val_data['target'].values)

def compute_metrics(predictions, labels):
    predictions = np.argmax(predictions, axis=1)
    f1 = f1_score(labels, predictions)
    accuracy = accuracy_score(labels, predictions)
    return {'f1': f1, 'accuracy': accuracy}

def create_tf_dataset(encodings, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((encodings, labels))
    dataset = dataset.cache()  # Cache the dataset
    dataset = dataset.shuffle(10000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)  # Prefetching
    return dataset

# Initialize TPU
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    tpu_cores = strategy.num_replicas_in_sync
    print(f"TPU cores available: {tpu_cores}")
except ValueError:
    print("TPU not found")
    raise SystemExit

# Directory to save models
model_save_dir = './saved_models'
os.makedirs(model_save_dir, exist_ok=True)

# File to store top 5 model predictions
top_predictions_file = 'top_5_predictions.json'

# Load existing top 5 predictions
if os.path.exists(top_predictions_file):
    with open(top_predictions_file, 'r') as file:
        top_predictions = json.load(file)
else:
    top_predictions = []

# Function to save top predictions
def save_top_predictions(pre_fine_tuning_file, post_fine_tuning_file, val_accuracy, model_number):
    global top_predictions

    new_entry = {
        'model_number': model_number,
        'val_accuracy': val_accuracy,
        'pre_fine_tuning_file': pre_fine_tuning_file,
        'post_fine_tuning_file': post_fine_tuning_file
    }

    # Add the new entry and sort by validation accuracy
    top_predictions.append(new_entry)
    top_predictions = sorted(top_predictions, key=lambda x: x['val_accuracy'], reverse=True)

    # If there are more than 5 entries, remove the one with the lowest accuracy
    if len(top_predictions) > 5:
        removed_entry = top_predictions.pop()
        # Check if the files exist before attempting to remove them
        if os.path.exists(removed_entry['pre_fine_tuning_file']):
            os.remove(removed_entry['pre_fine_tuning_file'])
            print(f"File {removed_entry['pre_fine_tuning_file']} has been removed.")
        else:
            print(f"File {removed_entry['pre_fine_tuning_file']} does not exist and cannot be removed.")
        
        if os.path.exists(removed_entry['post_fine_tuning_file']):
            os.remove(removed_entry['post_fine_tuning_file'])
            print(f"File {removed_entry['post_fine_tuning_file']} has been removed.")
        else:
            print(f"File {removed_entry['post_fine_tuning_file']} does not exist and cannot be removed.")

    # Save the updated top predictions to file
    with open(top_predictions_file, 'w') as file:
        json.dump(top_predictions, file, indent=4)

# Set fixed batch size and learning rate parameters
base_learning_rate = 1e-5
batch_size_per_core = 32
tpu_cores = 8
batch_size = batch_size_per_core * tpu_cores
learning_rate = base_learning_rate * (batch_size / (batch_size_per_core * tpu_cores))

# Create the datasets outside the objective function
train_dataset = create_tf_dataset(dict(train_encodings), train_labels, batch_size)
val_dataset = create_tf_dataset(dict(val_encodings), val_labels, batch_size)
kaggle_test_dataset = tf.data.Dataset.from_tensor_slices(dict(kaggle_test_encodings)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).cache()

fine_tune_encodings = tokenize_texts(val_data['clean_text'])
fine_tune_labels = tf.convert_to_tensor(val_data['target'].values)
fine_tune_dataset = tf.data.Dataset.from_tensor_slices((
    dict(fine_tune_encodings),
    fine_tune_labels
)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).cache()

# Track top 5 models
def objective(trial):
    num_epochs = trial.suggest_int("num_epochs", 1, 20)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.1)
    lr_scheduler_type = trial.suggest_categorical("lr_scheduler_type", ["constant", "linear", "cosine", "cosine_with_restarts"])
    gradient_clip_norm = trial.suggest_float("gradient_clip_norm", 0.0, 1.0)

    with strategy.scope():
        precision = tf.keras.metrics.Precision()
        recall = tf.keras.metrics.Recall()

        def f1_score_custom(y_true, y_pred):
            # Convert logits to predicted labels
            y_pred = tf.argmax(y_pred, axis=1)
            
            # Ensure true labels are in integer format
            y_true = tf.cast(y_true, tf.int64)
            
            # Update the state of precision and recall
            precision.update_state(y_true, y_pred)
            recall.update_state(y_true, y_pred)
            
            # Compute precision and recall values
            precision_result = precision.result()
            recall_result = recall.result()
            
            # Compute F1 score
            f1 = 2 * ((precision_result * recall_result) / (precision_result + recall_result + tf.keras.backend.epsilon()))
            
            return f1

        config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2, hidden_dropout_prob=dropout_rate)
        model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', config=config)

        if lr_scheduler_type == "linear":
            lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000,
                end_learning_rate=0.0,
                power=1.0
            )
        elif lr_scheduler_type == "cosine":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000
            )
        elif lr_scheduler_type == "cosine_with_restarts":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
                initial_learning_rate=learning_rate,
                first_decay_steps=1000
            )
        else:
            lr_schedule = learning_rate

        optimizer = tf.keras.optimizers.experimental.AdamW(
            learning_rate=lr_schedule,
            weight_decay=weight_decay,
            epsilon=1e-8,
            clipnorm=gradient_clip_norm
        )

        model.compile(
            optimizer=optimizer, 
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy'), f1_score_custom],
            steps_per_execution=32  # Experiment with different values like 16, 32, 64
        )

    model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset, verbose=1)

    # Evaluate on validation set
    val_loss, val_accuracy, val_f1_score = model.evaluate(val_dataset, verbose=1)
    print(f"f1 score: {val_f1_score} and accuracy: {val_accuracy}")
    
    avg_score = (val_accuracy + val_f1_score) / 2

    if len(top_predictions) < 5 or val_accuracy > min(top_predictions, key=lambda x: x['val_accuracy'])['val_accuracy']:
        # Pre-fine-tuning predictions
        kaggle_test_predictions = model.predict(kaggle_test_dataset).logits
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()
        pre_fine_tuning_predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}_pre_fine_tuning_submission.csv") 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(pre_fine_tuning_predictions_file, index=False)

        # Fine-tuning step
        training_data_size = len(train_data)
        fine_tune_data_size = len(val_data)
        fine_tune_epochs = max(1, round((fine_tune_data_size / training_data_size) * num_epochs))

        model.fit(fine_tune_dataset, epochs=fine_tune_epochs, verbose=1)

        # Post-fine-tuning predictions
        kaggle_test_predictions = model.predict(kaggle_test_dataset).logits
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()
        post_fine_tuning_predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}_post_fine_tuning_submission.csv") 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(post_fine_tuning_predictions_file, index=False)

        # Save the predictions and accuracy to the top 5 list
        save_top_predictions(pre_fine_tuning_predictions_file, post_fine_tuning_predictions_file, val_accuracy, trial.number)

    return val_accuracy

# Define your Optuna study, using the MySQL connection string
optuna_storage = 'mysql+pymysql://<username>:<password>@<host>/<database>?ssl_ca=<path_to_CA_cert>&ssl_verify_cert=true'

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
db_password = user_secrets.get_secret("DB_PASSWORD")# This uses the secrets inside of Kaggle so I don't have to explicitly type my password out in code

# Example with your details (replace '<password>' with your real password and '<database>' with your database name)
optuna_storage = f'mysql+pymysql://MichaelAzure:{db_password}@kaggle-third-sql.mysql.database.azure.com/kaggle_disaster_database?ssl_ca=/kaggle/input/certification&ssl_verify_cert=true'

studyName = 'disaster_BERT_prePost_0'
study = optuna.create_study(study_name=studyName, # name of the study
                            storage=optuna_storage,  # URL for the mySQL schema
                            direction='maximize', # maximize the log loss
                            load_if_exists=True, # makes it so that if the study_name already exists in the schema, then it will append the new trials with the old trials and essentially resume the study. It will also remember the previous trials so it really is resuming the study
                            )

study.optimize(objective, n_trials=100)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# Save top predictions JSON file to output directory for later use
output_top_predictions_file = '/kaggle/working/top_5_predictions.json'
with open(output_top_predictions_file, 'w') as file:
    json.dump(top_predictions, file, indent=4)

print(f"Top 5 predictions saved to {output_top_predictions_file}")


  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1718898334.304790   10776 common_lib.cc:798] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="localhost"
=== Source Location Trace: ===
learning/45eac/tfrc/runtime/common_lib.cc:479
D0620 15:45:34.313305469   10776 config.cc:196]                        gRPC EXPERIMENT call_status_override_on_cancellation   OFF (default:OFF)
D0620 15:45:34.313320064   10776 config.cc:196]                        gRPC EXPERIMENT call_v3                                OFF (default:OFF)
D0620 15:45:34.313323575   10776 config.cc:196]                        gRPC EXPERIMENT canary_client_privacy                  ON  (default:ON)
D0620 15:45:34.313325940   10776 config.cc:196]                        gRPC EXPERIMENT capture_base_context                   ON  (default:ON)
D0620 15:45:34.313328504   10776 config.cc:196]                        gRPC EXPERI

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


I0000 00:00:1718898353.630670   10776 service.cc:145] XLA service 0x5a6f3b5d1ec0 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1718898353.630739   10776 service.cc:153]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1718898353.630746   10776 service.cc:153]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1718898353.630749   10776 service.cc:153]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1718898353.630752   10776 service.cc:153]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1718898353.630754   10776 service.cc:153]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1718898353.630757   10776 service.cc:153]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1718898353.630760   10776 service.cc:153]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1718898353.630762   10776 service.cc:153]   StreamExecutor device (7): TPU, 2a886c8


TPU cores available: 8


[I 2024-06-20 15:46:04,288] Using an existing study with name 'disaster_BERT_prePost_0' instead of creating a new one.
I0000 00:00:1718898367.363196   10776 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718898631.133913   11604 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(962053fca1595148:0:0), session_name()
I0000 00:00:1718898673.958794   11604 tpu_compile_op_common.cc:245] Compilation of 962053fca1595148:0:0 with session name  took 42.824833075s and succeeded
I0000 00:00:1718898674.136183   11604 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(962053fca1595148:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_133869_4462497060588828470", property.function_library_fingerprint = 11723792636183010105, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718898740.684897   11567 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a84d3003ca04dd71:0:0), session_name()
I0000 00:00:1718898746.674165   11567 tpu_compile_op_common.cc:245] Compilation of a84d3003ca04dd71:0:0 with session name  took 5.989217879s and succeeded
I0000 00:00:1718898746.717253   11567 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a84d3003ca04dd71:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_170898_11776775324728099573", property.function_library_fingerprint = 4373956545788763588, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718898751.945255   11535 tpu_compile_op_common.cc:245] Compilation of 73df0a1fa3dca1ec:0:0 with session name  took 5.118334648s and succeeded
I0000 00:00:1718898751.990086   11535 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(73df0a1fa3dca1ec:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_170898_11776775324728099573", property.function_library_fingerprint = 4373956545788763588, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718898751.990271   11535 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7582877278327942 and accuracy: 0.8245798349380493


2024-06-20 15:53:16.551823: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718898798.246940   11527 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2c108279e65fbe9c:0:0), session_name()
I0000 00:00:1718898803.210645   11527 tpu_compile_op_common.cc:245] Compilation of 2c108279e65fbe9c:0:0 with session name  took 4.963667486s and succeeded
I0000 00:00:1718898803.238231   11527 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2c108279e65fbe9c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_3051610315426836343", property.function_library_fingerprint = 10973388231832347023, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718898813.810673   11535 tpu_compile_op_common.cc:245] Compilation of 1ca56da862631da3:0:0 with session name  took 4.808507268s and succeeded
I0000 00:00:1718898813.856052   11535 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1ca56da862631da3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_195050_12639636512716262103", property.function_library_fingerprint = 10973388231832347023, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718898813.856293   11535 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/3


I0000 00:00:1718898904.683356   11597 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ec6d7d59f15f6969:0:0), session_name()
I0000 00:00:1718898974.648372   11597 tpu_compile_op_common.cc:245] Compilation of ec6d7d59f15f6969:0:0 with session name  took 1m9.964967905s and succeeded
I0000 00:00:1718898974.931786   11597 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec6d7d59f15f6969:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_230535_3480343555661740429", property.function_library_fingerprint = 808723790873268687, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718899053.458722   11593 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1ac8b515286b3199:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_230535_3480343555661740429", property.function_library_fingerprint = 808723790873268687, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718899053.459423   11593 tpu_compilation_cache_interface.cc:541] After adding entry for key 1ac8b515286b3199:0:0 with session_name  cache is 9 entries (1821823799 bytes),  marked for eviction 0 entries (0 bytes).


Epoch 2/3
Epoch 3/3


[I 2024-06-20 15:57:39,299] Trial 5 finished with value: 0.8245798349380493 and parameters: {'num_epochs': 8, 'dropout_rate': 0.24334753706290796, 'weight_decay': 0.0797167213428231, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.12716685940434969}. Best is trial 1 with value: 0.8361344337463379.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718899311.722918   11521 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(441370100acc265e:0:0), session_name()
I0000 00:00:1718899352.388444   11521 tpu_compile_op_common.cc:245] Compilation of 441370100acc265e:0:0 with session name  took 40.665477619s and succeeded
I0000 00:00:1718899352.558985   11521 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(441370100acc265e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_398276_404774281260140609", property.function_library_fingerprint = 11007429340331026227, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718899391.626086   11527 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8b34bfb91f8b6565:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_398276_404774281260140609", property.function_library_fingerprint = 11007429340331026227, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718899391.626936   11527 tpu_compilation_cache_interface.cc:541] After adding entry for key 8b34bfb91f8b6565:0:0 with session_name  cache is 11 entries (2436575670 bytes),  marked for eviction 0 entries (0 bytes).
I0000 00:00:1718899416.20



I0000 00:00:1718899427.865802   11610 tpu_compile_op_common.cc:245] Compilation of b4ce633e14c7b27:0:0 with session name  took 5.580400206s and succeeded
I0000 00:00:1718899427.907293   11610 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b4ce633e14c7b27:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_435325_12078423948292086414", property.function_library_fingerprint = 14235638305616434292, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718899427.907741   11610 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/3
Epoch 3/3
f1 score: 0.3541888892650604 and accuracy: 0.7079831957817078


2024-06-20 16:04:18.543946: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718899460.352810   11523 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fd5551375dc5b588:0:0), session_name()
I0000 00:00:1718899465.301148   11523 tpu_compile_op_common.cc:245] Compilation of fd5551375dc5b588:0:0 with session name  took 4.948286813s and succeeded
I0000 00:00:1718899465.325456   11523 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fd5551375dc5b588:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13573834009204767100", property.function_library_fingerprint = 5254313713523478809, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718899476.703369   11597 tpu_compile_op_common.cc:245] Compilation of 72138b4eef19b5d6:0:0 with session name  took 5.684225173s and succeeded
I0000 00:00:1718899476.751781   11597 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(72138b4eef19b5d6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_450717_8729894754827802902", property.function_library_fingerprint = 5254313713523478809, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718899476.752113   11597 tpu_compilation_cache_interface.cc:541] After adding entry for 



[I 2024-06-20 16:08:42,165] Trial 10 finished with value: 0.7079831957817078 and parameters: {'num_epochs': 3, 'dropout_rate': 0.49676071674801636, 'weight_decay': 0.09387994951249515, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.005708515610518994}. Best is trial 8 with value: 0.8440126180648804.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718899973.734549   11545 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b49d5dad3cc770a3:0:0), session_name()
I0000 00:00:1718900015.124841   11545 tpu_compile_op_common.cc:245] Compilation of b49d5dad3cc770a3:0:0 with session name  took 41.390222423s and succeeded
I0000 00:00:1718900015.307725   11545 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b49d5dad3cc770a3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_652739_17563596546408617338", property.function_library_fingerprint = 14965064570703985647, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718900081.653542   11580 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(58c7a27613d05c39:0:0), session_name()
I0000 00:00:1718900088.177308   11580 tpu_compile_op_common.cc:245] Compilation of 58c7a27613d05c39:0:0 with session name  took 6.523704811s and succeeded
I0000 00:00:1718900088.233505   11580 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(58c7a27613d05c39:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_689784_2718500579854763210", property.function_library_fingerprint = 4943534012927776083, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size =



I0000 00:00:1718900093.937422   11592 tpu_compile_op_common.cc:245] Compilation of 3de391efb7ed4516:0:0 with session name  took 5.610945554s and succeeded
I0000 00:00:1718900093.986440   11592 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3de391efb7ed4516:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_689784_2718500579854763210", property.function_library_fingerprint = 4943534012927776083, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718900093.986649   11592 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7553678154945374 and accuracy: 0.8440126180648804


2024-06-20 16:15:32.212741: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718900134.030699   11605 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2224883cd98126fd:0:0), session_name()
I0000 00:00:1718900138.957879   11605 tpu_compile_op_common.cc:245] Compilation of 2224883cd98126fd:0:0 with session name  took 4.927140117s and succeeded
I0000 00:00:1718900138.981980   11605 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2224883cd98126fd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15677702566128283970", property.function_library_fingerprint = 9299666308814833821, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718900150.163732   11527 tpu_compile_op_common.cc:245] Compilation of db6e8d1301ced81e:0:0 with session name  took 5.277310217s and succeeded
I0000 00:00:1718900150.203461   11527 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(db6e8d1301ced81e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_710432_17823544652618518329", property.function_library_fingerprint = 9299666308814833821, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718900150.203702   11527 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/2


I0000 00:00:1718900243.729509   11525 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(14f19eeed3f457d6:0:0), session_name()
I0000 00:00:1718900317.544473   11525 tpu_compile_op_common.cc:245] Compilation of 14f19eeed3f457d6:0:0 with session name  took 1m13.814916619s and succeeded
I0000 00:00:1718900317.834669   11525 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(14f19eeed3f457d6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_745933_3798819892307443877", property.function_library_fingerprint = 18282813185898996993, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si

Epoch 2/2


[I 2024-06-20 16:20:02,523] Trial 16 finished with value: 0.8440126180648804 and parameters: {'num_epochs': 6, 'dropout_rate': 0.12516850688031994, 'weight_decay': 0.0506016797274356, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.986871845801423}. Best is trial 11 with value: 0.8450630307197571.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/15


I0000 00:00:1718900657.422729   11557 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eacafd5f422f6a8d:0:0), session_name()
I0000 00:00:1718900700.544000   11557 tpu_compile_op_common.cc:245] Compilation of eacafd5f422f6a8d:0:0 with session name  took 43.121222042s and succeeded
I0000 00:00:1718900700.731470   11557 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eacafd5f422f6a8d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_913218_6872501451352658002", property.function_library_fingerprint = 4574625601217869243, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718900744.706433   11588 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5f3169c61dfb5fa8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_913218_6872501451352658002", property.function_library_fingerprint = 4574625601217869243, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718900744.707179   11588 tpu_compilation_cache_interface.cc:541] After adding entry for key 5f3169c61dfb5fa8:0:0 with session_name  cache is 29 entries (6081586384 bytes),  marked for eviction 0 entries (0 bytes).
I0000 00:00:1718900769.28



I0000 00:00:1718900781.270080   11519 tpu_compile_op_common.cc:245] Compilation of 4f48376176a257f9:0:0 with session name  took 5.061191557s and succeeded
I0000 00:00:1718900781.302666   11519 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4f48376176a257f9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_950309_8481821412534646165", property.function_library_fingerprint = 12569090481728945843, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718900781.302835   11519 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
f1 score: 0.7764337658882141 and accuracy: 0.8361344337463379


2024-06-20 16:27:27.993852: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718900849.931755   11531 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1595d4ffee3af756:0:0), session_name()
I0000 00:00:1718900854.859607   11531 tpu_compile_op_common.cc:245] Compilation of 1595d4ffee3af756:0:0 with session name  took 4.927803696s and succeeded
I0000 00:00:1718900854.882628   11531 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1595d4ffee3af756:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_16395661485344860836", property.function_library_fingerprint = 27828109952394463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolo



I0000 00:00:1718900866.670521   11608 tpu_compile_op_common.cc:245] Compilation of d813e281799c137a:0:0 with session name  took 5.38362788s and succeeded
I0000 00:00:1718900866.704877   11608 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d813e281799c137a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_986725_14137236250283786535", property.function_library_fingerprint = 27828109952394463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718900866.705190   11608 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 1/5


I0000 00:00:1718900961.370583   11574 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b927a93082e60b5:0:0), session_name()
I0000 00:00:1718901031.715451   11574 tpu_compile_op_common.cc:245] Compilation of b927a93082e60b5:0:0 with session name  took 1m10.344823518s and succeeded
I0000 00:00:1718901031.946507   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b927a93082e60b5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1022268_326729954156748133", property.function_library_fingerprint = 4049699191604369960, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size =

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2024-06-20 16:31:49,746] Trial 21 finished with value: 0.8361344337463379 and parameters: {'num_epochs': 15, 'dropout_rate': 0.327875318200427, 'weight_decay': 0.062450691073599185, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.25832867801456155}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718901362.604772   11579 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(123e04d18c7316a1:0:0), session_name()
I0000 00:00:1718901404.386528   11579 tpu_compile_op_common.cc:245] Compilation of 123e04d18c7316a1:0:0 with session name  took 41.781702783s and succeeded
I0000 00:00:1718901404.599362   11579 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(123e04d18c7316a1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1191441_11171607108054535870", property.function_library_fingerprint = 12822026744684874258, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718901467.518264   11517 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a562c6320b3d75f9:0:0), session_name()
I0000 00:00:1718901473.606080   11517 tpu_compile_op_common.cc:245] Compilation of a562c6320b3d75f9:0:0 with session name  took 6.087753064s and succeeded
I0000 00:00:1718901473.645432   11517 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a562c6320b3d75f9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1228532_1598947808324455612", property.function_library_fingerprint = 15986754333565777376, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718901479.026958   11538 tpu_compile_op_common.cc:245] Compilation of 629bc8ff5349bb52:0:0 with session name  took 5.283415028s and succeeded
I0000 00:00:1718901479.073420   11538 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(629bc8ff5349bb52:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1228532_1598947808324455612", property.function_library_fingerprint = 15986754333565777376, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718901479.073669   11538 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7777224779129028 and accuracy: 0.8413865566253662


2024-06-20 16:38:46.444451: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718901528.277299   11522 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2816983009c93f49:0:0), session_name()
I0000 00:00:1718901533.293098   11522 tpu_compile_op_common.cc:245] Compilation of 2816983009c93f49:0:0 with session name  took 5.015745228s and succeeded
I0000 00:00:1718901533.321719   11522 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2816983009c93f49:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_9368198206205960284", property.function_library_fingerprint = 17447259845707161083, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718901544.657127   11555 tpu_compile_op_common.cc:245] Compilation of a5120b62b4eb7fe2:0:0 with session name  took 5.460446684s and succeeded
I0000 00:00:1718901544.700610   11555 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a5120b62b4eb7fe2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1254436_15124361560905752860", property.function_library_fingerprint = 17447259845707161083, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718901544.700799   11555 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 1/3


I0000 00:00:1718901638.531459   11547 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(721769c36546f398:0:0), session_name()
I0000 00:00:1718901712.569754   11547 tpu_compile_op_common.cc:245] Compilation of 721769c36546f398:0:0 with session name  took 1m14.038236035s and succeeded
I0000 00:00:1718901712.871254   11547 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(721769c36546f398:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1289979_4360020009117286352", property.function_library_fingerprint = 15724795323703778327, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/3
Epoch 3/3


[I 2024-06-20 16:43:06,485] Trial 27 finished with value: 0.8413865566253662 and parameters: {'num_epochs': 9, 'dropout_rate': 0.24845711902308767, 'weight_decay': 0.04610755671226927, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.3497145364500444}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
I0000 00:00:1718902038.251538   11593 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d71d51ad74126fdb:0:0), session_name()
I0000 00:00:1718902081.023142   11593 tpu_compile_op_common.cc:245] Compilation of d71d51ad74126fdb:0:0 with session name  took 42.7



I0000 00:00:1718902120.629739   11532 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ffcf7fb5940fda88:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1457924_18368571575685196918", property.function_library_fingerprint = 17589474454464419638, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718902120.630404   11532 tpu_compilation_cache_interface.cc:541] After adding entry for key ffcf7fb5940fda88:0:0 with session_name  cache is 47 entries (9727139112 bytes),  marked for eviction 5 entries (1200042624 bytes).
I0000 00:00:1



I0000 00:00:1718902156.440494   11589 tpu_compile_op_common.cc:245] Compilation of 6b8cbe3bca0d1674:0:0 with session name  took 5.455437893s and succeeded
I0000 00:00:1718902156.483301   11589 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6b8cbe3bca0d1674:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1495015_4672947215266674378", property.function_library_fingerprint = 12674783162327435041, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718902156.483497   11589 tpu_compilation_cache_interface.cc:541] After adding entr

f1 score: 0.6648235321044922 and accuracy: 0.792542040348053


2024-06-20 16:49:37.767543: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718902179.643527   11591 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(449d0b1160aa42b6:0:0), session_name()
I0000 00:00:1718902184.893132   11591 tpu_compile_op_common.cc:245] Compilation of 449d0b1160aa42b6:0:0 with session name  took 5.249564222s and succeeded
I0000 00:00:1718902184.919100   11591 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(449d0b1160aa42b6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_3347581390176622227", property.function_library_fingerprint = 8279155081326451872, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718902196.765606   11544 tpu_compile_op_common.cc:245] Compilation of f225c0d516b8b797:0:0 with session name  took 5.393210702s and succeeded
I0000 00:00:1718902196.800555   11544 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f225c0d516b8b797:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1506903_11285110795382352551", property.function_library_fingerprint = 8279155081326451872, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718902196.800773   11544 tpu_compilation_cache_interface.cc:541] After adding entry fo

File ./saved_models/disaster_BERT_prePost_0_model_trial_10_accuracy_0.7080_avg_score_0.5311_f1_0.3542_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_10_accuracy_0.7080_avg_score_0.5311_f1_0.3542_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 16:54:02,938] Trial 35 finished with value: 0.792542040348053 and parameters: {'num_epochs': 1, 'dropout_rate': 0.15084608571921063, 'weight_decay': 0.05229340523910205, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.19367148786931254}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


I0000 00:00:1718902693.899879   11579 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1377493a6fa8b6d1:0:0), session_name()
I0000 00:00:1718902735.126123   11579 tpu_compile_op_common.cc:245] Compilation of 1377493a6fa8b6d1:0:0 with session name  took 41.226175214s and succeeded
I0000 00:00:1718902735.345907   11579 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1377493a6fa8b6d1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1709163_4021231661005637217", property.function_library_fingerprint = 8083553879371971263, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718902798.508858   11559 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5fc8f3e7d92c2f6c:0:0), session_name()
I0000 00:00:1718902804.255704   11559 tpu_compile_op_common.cc:245] Compilation of 5fc8f3e7d92c2f6c:0:0 with session name  took 5.746790648s and succeeded
I0000 00:00:1718902804.307999   11559 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5fc8f3e7d92c2f6c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1746254_6794865725884575473", property.function_library_fingerprint = 7928748208151259072, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718902809.730245   11529 tpu_compile_op_common.cc:245] Compilation of 2745ca67bd3fe9c9:0:0 with session name  took 5.310132841s and succeeded
I0000 00:00:1718902809.771956   11529 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2745ca67bd3fe9c9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1746254_6794865725884575473", property.function_library_fingerprint = 7928748208151259072, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718902809.772187   11529 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7662800550460815 and accuracy: 0.8335084319114685


2024-06-20 17:01:07.393206: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718902869.351992   11570 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e6c28c3216a487c4:0:0), session_name()
I0000 00:00:1718902874.283549   11570 tpu_compile_op_common.cc:245] Compilation of e6c28c3216a487c4:0:0 with session name  took 4.931500733s and succeeded
I0000 00:00:1718902874.313816   11570 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e6c28c3216a487c4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_6022073234533471476", property.function_library_fingerprint = 14125638390454067992, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718902885.930813   11574 tpu_compile_op_common.cc:245] Compilation of 2edbd574223df4ac:0:0 with session name  took 5.190182159s and succeeded
I0000 00:00:1718902885.979607   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2edbd574223df4ac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1777414_10779943642590682233", property.function_library_fingerprint = 14125638390454067992, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718902885.979844   11574 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 1/4


I0000 00:00:1718902981.229935   11565 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bf371f6f4d07437:0:0), session_name()
I0000 00:00:1718903056.235274   11565 tpu_compile_op_common.cc:245] Compilation of bf371f6f4d07437:0:0 with session name  took 1m15.005292464s and succeeded
I0000 00:00:1718903056.475546   11565 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bf371f6f4d07437:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1812957_18317487074363794049", property.function_library_fingerprint = 7798870758356155200, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718903130.201795   11542 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2ff52c8c1ff111e3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1812957_18317487074363794049", property.function_library_fingerprint = 7798870758356155200, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718903130.202709   11542 tpu_compilation_cache_interface.cc:541] After adding entry for key 2ff52c8c1ff111e3:0:0 with session_name  cache is 63 entries (12757793695 bytes),  marked for eviction 20 entries (4259011146 bytes).


Epoch 2/4
Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_BERT_prePost_0_model_trial_35_accuracy_0.7925_avg_score_0.7287_f1_0.6648_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_35_accuracy_0.7925_avg_score_0.7287_f1_0.6648_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 17:05:37,501] Trial 44 finished with value: 0.8335084319114685 and parameters: {'num_epochs': 12, 'dropout_rate': 0.35349217252770226, 'weight_decay': 0.07264894816962676, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.2330936845570345}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718903389.827668   11609 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(80c7264bade55633:0:0), session_name()
I0000 00:00:1718903431.132975   11609 tpu_compile_op_common.cc:245] Compilation of 80c7264bade55633:0:0 with session name  took 41.30526242s and succeeded
I0000 00:00:1718903431.313861   11609 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(80c7264bade55633:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1981302_14479509443513591071", property.function_library_fingerprint = 6177605963383115921, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718903474.507106   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1ced8d6b5739802:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1981302_14479509443513591071", property.function_library_fingerprint = 6177605963383115921, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718903474.508154   11574 tpu_compilation_cache_interface.cc:541] After adding entry for key 1ced8d6b5739802:0:0 with session_name  cache is 65 entries (13372272764 bytes),  marked for eviction 23 entries (4844852404 bytes).
I0000 00:00:17



I0000 00:00:1718903511.596956   11552 tpu_compile_op_common.cc:245] Compilation of 7ad5cc11196a73fa:0:0 with session name  took 5.310043183s and succeeded
I0000 00:00:1718903511.643205   11552 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7ad5cc11196a73fa:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2018331_8527934408798225954", property.function_library_fingerprint = 8325584396581746178, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718903511.643416   11552 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7330830693244934 and accuracy: 0.8434873819351196


2024-06-20 17:12:24.475731: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718903546.314345   11540 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6d3801f7841bfc8f:0:0), session_name()
I0000 00:00:1718903551.343128   11540 tpu_compile_op_common.cc:245] Compilation of 6d3801f7841bfc8f:0:0 with session name  took 5.028734641s and succeeded
I0000 00:00:1718903551.369041   11540 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6d3801f7841bfc8f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5591761567832042939", property.function_library_fingerprint = 11698233466812290176, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718903562.993502   11596 tpu_compile_op_common.cc:245] Compilation of 54d5a0665bd6dd5b:0:0 with session name  took 5.318336951s and succeeded
I0000 00:00:1718903563.032083   11596 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(54d5a0665bd6dd5b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2035475_4854138295841552090", property.function_library_fingerprint = 11698233466812290176, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718903563.032302   11596 tpu_compilation_cache_interface.cc:541] After adding entry fo

File ./saved_models/disaster_BERT_prePost_0_model_trial_5_accuracy_0.8246_avg_score_0.7914_f1_0.7583_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_5_accuracy_0.8246_avg_score_0.7914_f1_0.7583_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 17:16:41,146] Trial 52 finished with value: 0.8434873819351196 and parameters: {'num_epochs': 4, 'dropout_rate': 0.17569094387929707, 'weight_decay': 0.0399479676292048, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.13402863326444986}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718904055.478910   11611 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(245d36a2d986445e:0:0), session_name()
I0000 00:00:1718904099.479370   11611 tpu_compile_op_common.cc:245] Compilation of 245d36a2d986445e:0:0 with session name  took 44.000396261s and succeeded
I0000 00:00:1718904099.699145   11611 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(245d36a2d986445e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2237457_9019343022818527081", property.function_library_fingerprint = 17047556225281706228, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718904142.625181   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7d0f6f1506eed32:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2237457_9019343022818527081", property.function_library_fingerprint = 17047556225281706228, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718904142.627501   11574 tpu_compilation_cache_interface.cc:541] After adding entry for key 7d0f6f1506eed32:0:0 with session_name  cache is 74 entries (15194610813 bytes),  marked for eviction 32 entries (6667514705 bytes).
I0000 00:00:17



I0000 00:00:1718904180.240624   11590 tpu_compile_op_common.cc:245] Compilation of f0fa43b63b240b94:0:0 with session name  took 5.649036515s and succeeded
I0000 00:00:1718904180.288155   11590 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f0fa43b63b240b94:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2274502_1559488547884710151", property.function_library_fingerprint = 16553613611389073645, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718904180.288378   11590 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7664282321929932 and accuracy: 0.8445377945899963


2024-06-20 17:23:40.100985: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718904222.063939   11572 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eb792c2f8eb34f49:0:0), session_name()
I0000 00:00:1718904227.318405   11572 tpu_compile_op_common.cc:245] Compilation of eb792c2f8eb34f49:0:0 with session name  took 5.254397168s and succeeded
I0000 00:00:1718904227.347305   11572 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eb792c2f8eb34f49:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_7974272288885540131", property.function_library_fingerprint = 11126974173976787197, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718904239.295628   11523 tpu_compile_op_common.cc:245] Compilation of eca4f3eb820c95e5:0:0 with session name  took 5.567721585s and succeeded
I0000 00:00:1718904239.338409   11523 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eca4f3eb820c95e5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2295150_9270544845529306875", property.function_library_fingerprint = 11126974173976787197, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718904239.338628   11523 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/2


I0000 00:00:1718904332.833589   11541 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3e0a110474d2e7d9:0:0), session_name()
I0000 00:00:1718904410.128451   11541 tpu_compile_op_common.cc:245] Compilation of 3e0a110474d2e7d9:0:0 with session name  took 1m17.294789927s and succeeded
I0000 00:00:1718904410.371739   11541 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3e0a110474d2e7d9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2330651_17252631915209352692", property.function_library_fingerprint = 18004088054090056952, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_

Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_44_accuracy_0.8335_avg_score_0.7999_f1_0.7663_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_44_accuracy_0.8335_avg_score_0.7999_f1_0.7663_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 17:28:11,308] Trial 61 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 6, 'dropout_rate': 0.2258231706808198, 'weight_decay': 0.014319634381160691, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.0030855075035454704}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718904756.548632   11540 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e3a07f4db9eb6460:0:0), session_name()
I0000 00:00:1718904799.997079   11540 tpu_compile_op_common.cc:245] Compilation of e3a07f4db9eb6460:0:0 with session name  took 43.448374633s and succeeded
I0000 00:00:1718904800.224443   11540 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e3a07f4db9eb6460:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2497936_820089072622243310", property.function_library_fingerprint = 4334836094574232591, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718904841.035134   11534 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(49622255f5e64ec7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2497936_820089072622243310", property.function_library_fingerprint = 4334836094574232591, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718904841.036131   11534 tpu_compilation_cache_interface.cc:541] After adding entry for key 49622255f5e64ec7:0:0 with session_name  cache is 83 entries (17017241907 bytes),  marked for eviction 41 entries (8490291285 bytes).
I0000 00:00:17



I0000 00:00:1718904878.440947   11583 tpu_compile_op_common.cc:245] Compilation of 850d72ad53a227cd:0:0 with session name  took 5.167312928s and succeeded
I0000 00:00:1718904878.481011   11583 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(850d72ad53a227cd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2535027_11805749623871807374", property.function_library_fingerprint = 12727707650195897699, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718904878.481231   11583 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7607415914535522 and accuracy: 0.848739504814148


2024-06-20 17:35:24.547734: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718904926.548193   11531 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a78ea15074afc3ba:0:0), session_name()
I0000 00:00:1718904931.625709   11531 tpu_compile_op_common.cc:245] Compilation of a78ea15074afc3ba:0:0 with session name  took 5.077471647s and succeeded
I0000 00:00:1718904931.655494   11531 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a78ea15074afc3ba:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_12972198271486274689", property.function_library_fingerprint = 16517301309831355513, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718904943.913524   11521 tpu_compile_op_common.cc:245] Compilation of 767148de8b855db5:0:0 with session name  took 5.534871791s and succeeded
I0000 00:00:1718904943.959089   11521 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(767148de8b855db5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2559179_4045296977611323453", property.function_library_fingerprint = 16517301309831355513, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718904943.959358   11521 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/3


I0000 00:00:1718905041.645202   11532 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(17295b4c6ad6d7d6:0:0), session_name()
I0000 00:00:1718905118.474388   11532 tpu_compile_op_common.cc:245] Compilation of 17295b4c6ad6d7d6:0:0 with session name  took 1m16.829135863s and succeeded
I0000 00:00:1718905118.710951   11532 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(17295b4c6ad6d7d6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2594722_4345004288228425889", property.function_library_fingerprint = 13377736355156427232, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/3
Epoch 3/3
File ./saved_models/disaster_BERT_prePost_0_model_trial_21_accuracy_0.8361_avg_score_0.8063_f1_0.7764_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_21_accuracy_0.8361_avg_score_0.8063_f1_0.7764_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 17:39:57,413] Trial 71 finished with value: 0.848739504814148 and parameters: {'num_epochs': 8, 'dropout_rate': 0.26600647404799943, 'weight_decay': 0.04875632705236682, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.27390857833911153}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718905465.451146   11525 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(16fd84ec2b3f9d0b:0:0), session_name()
I0000 00:00:1718905509.066376   11525 tpu_compile_op_common.cc:245] Compilation of 16fd84ec2b3f9d0b:0:0 with session name  took 43.615170653s and succeeded
I0000 00:00:1718905509.292732   11525 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(16fd84ec2b3f9d0b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2762509_10217637623697963926", property.function_library_fingerprint = 11646191364029305118, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718905576.219702   11546 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c9299d567bf92159:0:0), session_name()
I0000 00:00:1718905582.549674   11546 tpu_compile_op_common.cc:245] Compilation of c9299d567bf92159:0:0 with session name  took 6.329918961s and succeeded
I0000 00:00:1718905582.600733   11546 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c9299d567bf92159:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2799554_1284005947978649529", property.function_library_fingerprint = 9084292880259013637, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718905587.952146   11527 tpu_compile_op_common.cc:245] Compilation of 6355864c82914b44:0:0 with session name  took 5.243279192s and succeeded
I0000 00:00:1718905587.990092   11527 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6355864c82914b44:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2799554_1284005947978649529", property.function_library_fingerprint = 9084292880259013637, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718905587.990380   11527 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7759069204330444 and accuracy: 0.8356092572212219


[I 2024-06-20 17:46:59,500] Trial 80 finished with value: 0.8356092572212219 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2661540599246535, 'weight_decay': 0.009546017180836447, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.34549983715785704}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718905880.887072   11569 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a84f8c5c292126fb:0:0), session_name()
I0000 00:00:1718905925.541268   11569 tpu_compile_op_common.cc:245] Compilation of a84f8c5c292126fb:0:0 with session name  took 44.654151861s and succeeded
I0000 00:00:1718905925.724422   11569 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a84f8c5c292126fb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2955885_5571105142784405333", property.function_library_fingerprint = 12630817677519717809, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718905993.820000   11588 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(23b62f5dc5745eca:0:0), session_name()
I0000 00:00:1718906000.854260   11588 tpu_compile_op_common.cc:245] Compilation of 23b62f5dc5745eca:0:0 with session name  took 7.03418089s and succeeded
I0000 00:00:1718906000.915625   11588 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(23b62f5dc5745eca:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2992930_9497403121395294863", property.function_library_fingerprint = 4724717078958940620, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size =



I0000 00:00:1718906006.806541   11572 tpu_compile_op_common.cc:245] Compilation of c357cbf02db1c1e9:0:0 with session name  took 5.772524894s and succeeded
I0000 00:00:1718906006.851922   11572 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c357cbf02db1c1e9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2992930_9497403121395294863", property.function_library_fingerprint = 4724717078958940620, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718906006.852224   11572 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7288962602615356 and accuracy: 0.8340336084365845


[I 2024-06-20 17:53:44,882] Trial 84 finished with value: 0.8340336084365845 and parameters: {'num_epochs': 5, 'dropout_rate': 0.3042888586387745, 'weight_decay': 0.04081093721705297, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.6962447660166491}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718906291.348651   11555 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c195a5b8d267de22:0:0), session_name()
I0000 00:00:1718906335.978082   11555 tpu_compile_op_common.cc:245] Compilation of c195a5b8d267de22:0:0 with session name  took 44.629374551s and succeeded
I0000 00:00:1718906336.212723   11555 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c195a5b8d267de22:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3140659_7114014570110836342", property.function_library_fingerprint = 12823036099544773430, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718906379.445828   11550 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5ebf30df6e261dc5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3140659_7114014570110836342", property.function_library_fingerprint = 12823036099544773430, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718906379.446607   11550 tpu_compilation_cache_interface.cc:541] After adding entry for key 5ebf30df6e261dc5:0:0 with session_name  cache is 100 entries (20348977691 bytes),  marked for eviction 59 entries (12135857797 bytes).
I0000 00:0



I0000 00:00:1718906418.649911   11527 tpu_compile_op_common.cc:245] Compilation of 207f9516ee0713c4:0:0 with session name  took 5.669493334s and succeeded
I0000 00:00:1718906418.698761   11527 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(207f9516ee0713c4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3177750_1593026716011929883", property.function_library_fingerprint = 9510085642671984816, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718906418.698974   11527 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7625380158424377 and accuracy: 0.8445377945899963


2024-06-20 18:01:03.425849: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718906465.472303   11521 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fb1d9edb295a5821:0:0), session_name()
I0000 00:00:1718906471.208466   11521 tpu_compile_op_common.cc:245] Compilation of fb1d9edb295a5821:0:0 with session name  took 5.736101966s and succeeded
I0000 00:00:1718906471.253666   11521 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fb1d9edb295a5821:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13377651767625653340", property.function_library_fingerprint = 13724589256300015158, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718906483.738934   11574 tpu_compile_op_common.cc:245] Compilation of b1a6f68e836441c4:0:0 with session name  took 5.847539533s and succeeded
I0000 00:00:1718906483.798895   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b1a6f68e836441c4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3200150_2123471982869088985", property.function_library_fingerprint = 13724589256300015158, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718906483.799358   11574 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/2


I0000 00:00:1718906582.627879   11532 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7c081b80b669cb04:0:0), session_name()
I0000 00:00:1718906662.128791   11532 tpu_compile_op_common.cc:245] Compilation of 7c081b80b669cb04:0:0 with session name  took 1m19.500838313s and succeeded
I0000 00:00:1718906662.427516   11532 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7c081b80b669cb04:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3235693_15203725348132333559", property.function_library_fingerprint = 13702025574255102229, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_

Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_27_accuracy_0.8414_avg_score_0.8096_f1_0.7777_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_27_accuracy_0.8414_avg_score_0.8096_f1_0.7777_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 18:05:38,013] Trial 91 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 7, 'dropout_rate': 0.1547084558089405, 'weight_decay': 0.0530312265803758, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.2564680801746736}. Best is trial 20 with value: 0.8497899174690247.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


I0000 00:00:1718907004.940712   11525 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(64067cc09a26c68f:0:0), session_name()
I0000 00:00:1718907048.398764   11525 tpu_compile_op_common.cc:245] Compilation of 64067cc09a26c68f:0:0 with session name  took 43.457987391s and succeeded
I0000 00:00:1718907048.599569   11525 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(64067cc09a26c68f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3402866_17928309523297759145", property.function_library_fingerprint = 11871814105499182736, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718907115.264176   11540 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1367d53ed43d50a2:0:0), session_name()
I0000 00:00:1718907121.455021   11540 tpu_compile_op_common.cc:245] Compilation of 1367d53ed43d50a2:0:0 with session name  took 6.190768693s and succeeded
I0000 00:00:1718907121.510082   11540 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1367d53ed43d50a2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3439911_6983018968851871242", property.function_library_fingerprint = 9742695531749884797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718907126.862044   11610 tpu_compile_op_common.cc:245] Compilation of 14393cb60e4da916:0:0 with session name  took 5.229290002s and succeeded
I0000 00:00:1718907126.905808   11610 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(14393cb60e4da916:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3439911_6983018968851871242", property.function_library_fingerprint = 9742695531749884797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718907126.906095   11610 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.8205124139785767 and accuracy: 0.8350840210914612


[I 2024-06-20 18:12:45,596] Trial 100 finished with value: 0.8350840210914612 and parameters: {'num_epochs': 11, 'dropout_rate': 0.1276312849027712, 'weight_decay': 0.04327998463512947, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.6878794827893797}. Best is trial 94 with value: 0.8503151535987854.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718907431.358792   11572 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2bef5b61861ff748:0:0), session_name()
I0000 00:00:1718907473.736618   11572 tpu_compile_op_common.cc:245] Compilation of 2bef5b61861ff748:0:0 with session name  took 42.377772814s and succeeded
I0000 00:00:1718907473.926031   11572 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2bef5b61861ff748:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3597994_4976006025232655141", property.function_library_fingerprint = 10652487100329396039, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718907541.803533   11547 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(120368edefe1c28f:0:0), session_name()
I0000 00:00:1718907548.158811   11547 tpu_compile_op_common.cc:245] Compilation of 120368edefe1c28f:0:0 with session name  took 6.355203853s and succeeded
I0000 00:00:1718907548.212630   11547 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(120368edefe1c28f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3635039_14601088963301109468", property.function_library_fingerprint = 15641651939702950945, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718907553.850292   11518 tpu_compile_op_common.cc:245] Compilation of e49bdca8ac95b619:0:0 with session name  took 5.541569227s and succeeded
I0000 00:00:1718907553.887813   11518 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e49bdca8ac95b619:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3635039_14601088963301109468", property.function_library_fingerprint = 15641651939702950945, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718907553.888014   11518 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7410678267478943 and accuracy: 0.8482142686843872


2024-06-20 18:19:50.981586: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718907593.012402   11532 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(653677880d1b5455:0:0), session_name()
I0000 00:00:1718907598.749165   11532 tpu_compile_op_common.cc:245] Compilation of 653677880d1b5455:0:0 with session name  took 5.736683112s and succeeded
I0000 00:00:1718907598.789081   11532 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(653677880d1b5455:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_2265228442577599354", property.function_library_fingerprint = 4291881508162630765, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718907610.404988   11572 tpu_compile_op_common.cc:245] Compilation of 468f2d090e33290d:0:0 with session name  took 5.596389269s and succeeded
I0000 00:00:1718907610.448479   11572 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(468f2d090e33290d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3653935_6866688719217102787", property.function_library_fingerprint = 4291881508162630765, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718907610.448777   11572 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/2


I0000 00:00:1718907708.088589   11567 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ef8e11da88c22bc1:0:0), session_name()
I0000 00:00:1718907783.010864   11567 tpu_compile_op_common.cc:245] Compilation of ef8e11da88c22bc1:0:0 with session name  took 1m14.922199211s and succeeded
I0000 00:00:1718907783.320571   11567 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ef8e11da88c22bc1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3689436_3279403227655262867", property.function_library_fingerprint = 1688939032587075963, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si



I0000 00:00:1718907857.085772   11531 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d2d0fa95527dee38:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3689436_3279403227655262867", property.function_library_fingerprint = 1688939032587075963, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718907857.086999   11531 tpu_compilation_cache_interface.cc:541] After adding entry for key d2d0fa95527dee38:0:0 with session_name  cache is 120 entries (24133987908 bytes),  marked for eviction 77 entries (15780470191 bytes).


Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_52_accuracy_0.8435_avg_score_0.7883_f1_0.7331_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_52_accuracy_0.8435_avg_score_0.7883_f1_0.7331_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 18:24:21,920] Trial 108 finished with value: 0.8482142686843872 and parameters: {'num_epochs': 5, 'dropout_rate': 0.1955947621410197, 'weight_decay': 0.05998752539149736, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.8849249101561287}. Best is trial 94 with value: 0.8503151535987854.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2


I0000 00:00:1718908127.586052   11539 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f1079cd83ec4b00:0:0), session_name()
I0000 00:00:1718908170.636089   11539 tpu_compile_op_common.cc:245] Compilation of f1079cd83ec4b00:0:0 with session name  took 43.049997069s and succeeded
I0000 00:00:1718908170.861653   11539 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f1079cd83ec4b00:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3856507_5611129986348552717", property.function_library_fingerprint = 7210616605906780923, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size = 



I0000 00:00:1718908238.995382   11590 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1b8254ed822c511b:0:0), session_name()
I0000 00:00:1718908245.344421   11590 tpu_compile_op_common.cc:245] Compilation of 1b8254ed822c511b:0:0 with session name  took 6.348984908s and succeeded
I0000 00:00:1718908245.404065   11590 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1b8254ed822c511b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3893536_12935006388172436096", property.function_library_fingerprint = 11060062979864061024, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718908250.973922   11610 tpu_compile_op_common.cc:245] Compilation of 40d07d97f153c6e7:0:0 with session name  took 5.454815247s and succeeded
I0000 00:00:1718908251.026572   11610 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(40d07d97f153c6e7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3893536_12935006388172436096", property.function_library_fingerprint = 11060062979864061024, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718908251.027041   11610 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/2
f1 score: 0.6653342843055725 and accuracy: 0.8245798349380493


[I 2024-06-20 18:30:57,482] Trial 117 finished with value: 0.8245798349380493 and parameters: {'num_epochs': 2, 'dropout_rate': 0.16301049735444367, 'weight_decay': 0.0549986043875441, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.912691311998414}. Best is trial 94 with value: 0.8503151535987854.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718908520.139920   11582 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6c525f1725518dea:0:0), session_name()
I0000 00:00:1718908563.924525   11582 tpu_compile_op_common.cc:245] Compilation of 6c525f1725518dea:0:0 with session name  took 43.784524808s and succeeded
I0000 00:00:1718908564.109886   11582 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6c525f1725518dea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4036009_14221053584786374714", property.function_library_fingerprint = 9985501326600642112, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718908632.456864   11558 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8284282fc53c6c5d:0:0), session_name()
I0000 00:00:1718908638.853100   11558 tpu_compile_op_common.cc:245] Compilation of 8284282fc53c6c5d:0:0 with session name  took 6.396173861s and succeeded
I0000 00:00:1718908638.907136   11558 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8284282fc53c6c5d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4073100_15405224823185362633", property.function_library_fingerprint = 6106517964148103171, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718908644.749957   11564 tpu_compile_op_common.cc:245] Compilation of e7733c1fc2a41dd6:0:0 with session name  took 5.732090876s and succeeded
I0000 00:00:1718908644.802531   11564 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e7733c1fc2a41dd6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4073100_15405224823185362633", property.function_library_fingerprint = 6106517964148103171, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718908644.802634   11564 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7138528823852539 and accuracy: 0.8287814855575562


[I 2024-06-20 18:37:37,807] Trial 123 finished with value: 0.8287814855575562 and parameters: {'num_epochs': 4, 'dropout_rate': 0.2087738507773683, 'weight_decay': 0.06799554648742413, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.890416096728803}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718908922.256697   11548 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e957a3cc85921ec2:0:0), session_name()
I0000 00:00:1718908965.711532   11548 tpu_compile_op_common.cc:245] Compilation of e957a3cc85921ec2:0:0 with session name  took 43.454789106s and succeeded
I0000 00:00:1718908965.895923   11548 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e957a3cc85921ec2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4218919_13847069688266728113", property.function_library_fingerprint = 18236794640366659026, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718909035.577431   11545 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(86010272650d303e:0:0), session_name()
I0000 00:00:1718909042.098949   11545 tpu_compile_op_common.cc:245] Compilation of 86010272650d303e:0:0 with session name  took 6.521447147s and succeeded
I0000 00:00:1718909042.156160   11545 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(86010272650d303e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4255964_9092445581066214960", property.function_library_fingerprint = 16918237123415502430, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718909048.109630   11572 tpu_compile_op_common.cc:245] Compilation of acc98734f2a250cf:0:0 with session name  took 5.840692128s and succeeded
I0000 00:00:1718909048.164402   11572 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(acc98734f2a250cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4255964_9092445581066214960", property.function_library_fingerprint = 16918237123415502430, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718909048.164728   11572 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7622071504592896 and accuracy: 0.8466386795043945


2024-06-20 18:44:48.170318: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718909090.189471   11606 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3276977d2e053808:0:0), session_name()
I0000 00:00:1718909095.334039   11606 tpu_compile_op_common.cc:245] Compilation of 3276977d2e053808:0:0 with session name  took 5.144529735s and succeeded
I0000 00:00:1718909095.369430   11606 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3276977d2e053808:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15266877691654046417", property.function_library_fingerprint = 16046026265185238199, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718909107.344194   11523 tpu_compile_op_common.cc:245] Compilation of e85917469c013731:0:0 with session name  took 5.576876543s and succeeded
I0000 00:00:1718909107.386864   11523 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e85917469c013731:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4276612_7515472487250454943", property.function_library_fingerprint = 16046026265185238199, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718909107.387091   11523 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/2


I0000 00:00:1718909203.470470   11516 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6bbaa340054aa63b:0:0), session_name()
I0000 00:00:1718909284.116530   11516 tpu_compile_op_common.cc:245] Compilation of 6bbaa340054aa63b:0:0 with session name  took 1m20.646010157s and succeeded
I0000 00:00:1718909284.380357   11516 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6bbaa340054aa63b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4312113_13977154922053252807", property.function_library_fingerprint = 15633787415693251691, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_

Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_16_accuracy_0.8440_avg_score_0.7997_f1_0.7554_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_16_accuracy_0.8440_avg_score_0.7997_f1_0.7554_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 18:49:25,258] Trial 130 finished with value: 0.8466386795043945 and parameters: {'num_epochs': 6, 'dropout_rate': 0.20182856661863322, 'weight_decay': 0.06381862123101183, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.8373001344178315}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718909633.446867   11558 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c548a3eae916944a:0:0), session_name()
I0000 00:00:1718909676.410218   11558 tpu_compile_op_common.cc:245] Compilation of c548a3eae916944a:0:0 with session name  took 42.96328344s and succeeded
I0000 00:00:1718909676.640872   11558 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c548a3eae916944a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4479240_9555650978883702365", property.function_library_fingerprint = 15088713371299833483, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718909721.866828   11545 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c51790b2cc8f21f4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4479240_9555650978883702365", property.function_library_fingerprint = 15088713371299833483, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718909721.868682   11545 tpu_compilation_cache_interface.cc:541] After adding entry for key c51790b2cc8f21f4:0:0 with session_name  cache is 139 entries (28080164187 bytes),  marked for eviction 96 entries (19594365615 bytes).
I0000 00:0



I0000 00:00:1718909762.322342   11521 tpu_compile_op_common.cc:245] Compilation of e92ede3a8d0269cf:0:0 with session name  took 5.809419088s and succeeded
I0000 00:00:1718909762.370085   11521 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e92ede3a8d0269cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4516285_16926472012386703790", property.function_library_fingerprint = 13697609322240625158, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718909762.370499   11521 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.746450662612915 and accuracy: 0.8471638560295105


2024-06-20 18:56:37.612703: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718909799.545925   11601 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6ffc80b948c82175:0:0), session_name()
I0000 00:00:1718909804.977473   11601 tpu_compile_op_common.cc:245] Compilation of 6ffc80b948c82175:0:0 with session name  took 5.431509658s and succeeded
I0000 00:00:1718909805.006165   11601 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6ffc80b948c82175:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_11379483906950201181", property.function_library_fingerprint = 12854230399100945434, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718909817.020857   11523 tpu_compile_op_common.cc:245] Compilation of 4b16d047c9fe23d0:0:0 with session name  took 5.754106963s and succeeded
I0000 00:00:1718909817.060080   11523 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4b16d047c9fe23d0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4535181_1067991319503021407", property.function_library_fingerprint = 12854230399100945434, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718909817.060293   11523 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/2


I0000 00:00:1718909915.459795   11610 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ed1ae254a9e00002:0:0), session_name()
I0000 00:00:1718909990.404670   11610 tpu_compile_op_common.cc:245] Compilation of ed1ae254a9e00002:0:0 with session name  took 1m14.94481163s and succeeded
I0000 00:00:1718909990.684235   11610 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ed1ae254a9e00002:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4570682_4402715332634015200", property.function_library_fingerprint = 14416757421842290146, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si

Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_91_accuracy_0.8445_avg_score_0.8035_f1_0.7625_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_91_accuracy_0.8445_avg_score_0.8035_f1_0.7625_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 19:01:07,290] Trial 141 finished with value: 0.8471638560295105 and parameters: {'num_epochs': 5, 'dropout_rate': 0.2619569665138074, 'weight_decay': 0.0865121913462253, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.575684581679569}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718910334.893759   11534 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d9a1b8116bb8c8cd:0:0), session_name()
I0000 00:00:1718910377.628234   11534 tpu_compile_op_common.cc:245] Compilation of d9a1b8116bb8c8cd:0:0 with session name  took 42.734424703s and succeeded
I0000 00:00:1718910377.869418   11534 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d9a1b8116bb8c8cd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4737825_12482133810391578911", property.function_library_fingerprint = 3381703746498424852, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718910421.146312   11558 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(432d4252f255e3e6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4737825_12482133810391578911", property.function_library_fingerprint = 3381703746498424852, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718910421.149091   11558 tpu_compilation_cache_interface.cc:541] After adding entry for key 432d4252f255e3e6:0:0 with session_name  cache is 148 entries (29902772639 bytes),  marked for eviction 104 entries (21341553210 bytes).
I0000 00:



I0000 00:00:1718910458.966976   11595 tpu_compile_op_common.cc:245] Compilation of 56717cdba68a057e:0:0 with session name  took 5.460717103s and succeeded
I0000 00:00:1718910459.007939   11595 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(56717cdba68a057e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4774874_9698647648319812486", property.function_library_fingerprint = 11232337756667600388, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718910459.008130   11595 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7439833283424377 and accuracy: 0.8513655662536621


2024-06-20 19:08:22.782043: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718910504.869856   11581 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(850fbd190b10b9e7:0:0), session_name()
I0000 00:00:1718910510.588531   11581 tpu_compile_op_common.cc:245] Compilation of 850fbd190b10b9e7:0:0 with session name  took 5.718632897s and succeeded
I0000 00:00:1718910510.619101   11581 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(850fbd190b10b9e7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_2388588395835431825", property.function_library_fingerprint = 14336824673814424934, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718910523.912515   11606 tpu_compile_op_common.cc:245] Compilation of c83581dd82b544ba:0:0 with session name  took 5.979866994s and succeeded
I0000 00:00:1718910523.970440   11606 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c83581dd82b544ba:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4797274_2473786836047163929", property.function_library_fingerprint = 14336824673814424934, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718910523.970729   11606 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/2


I0000 00:00:1718910624.529624   11525 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(90c33c45d17c466c:0:0), session_name()
I0000 00:00:1718910705.188394   11525 tpu_compile_op_common.cc:245] Compilation of 90c33c45d17c466c:0:0 with session name  took 1m20.658700487s and succeeded
I0000 00:00:1718910705.438621   11525 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(90c33c45d17c466c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4832779_1711818599516353190", property.function_library_fingerprint = 14456613367866139992, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_61_accuracy_0.8445_avg_score_0.8055_f1_0.7664_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_61_accuracy_0.8445_avg_score_0.8055_f1_0.7664_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 19:13:11,125] Trial 153 finished with value: 0.8513655662536621 and parameters: {'num_epochs': 7, 'dropout_rate': 0.2914951961763816, 'weight_decay': 0.07713667826480163, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.31936719490985394}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718911059.754220   11583 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8b222004e50efb6e:0:0), session_name()
I0000 00:00:1718911102.356854   11583 tpu_compile_op_common.cc:245] Compilation of 8b222004e50efb6e:0:0 with session name  took 42.602545919s and succeeded
I0000 00:00:1718911102.543995   11583 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8b222004e50efb6e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4999910_682862207996232727", property.function_library_fingerprint = 9952837117782467603, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718911144.136887   11554 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8a0d25d1c0449ea3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4999910_682862207996232727", property.function_library_fingerprint = 9952837117782467603, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718911144.137838   11554 tpu_compilation_cache_interface.cc:541] After adding entry for key 8a0d25d1c0449ea3:0:0 with session_name  cache is 157 entries (31725381363 bytes),  marked for eviction 116 entries (23512084009 bytes).
I0000 00:00



I0000 00:00:1718911182.233228   11527 tpu_compile_op_common.cc:245] Compilation of d2c18008d5581ffd:0:0 with session name  took 5.325438941s and succeeded
I0000 00:00:1718911182.281208   11527 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d2c18008d5581ffd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5036955_8301092437569122593", property.function_library_fingerprint = 1577628335227340112, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718911182.281701   11527 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7900002598762512 and accuracy: 0.8298319578170776


[I 2024-06-20 19:20:09,925] Trial 164 finished with value: 0.8298319578170776 and parameters: {'num_epochs': 8, 'dropout_rate': 0.15779565212416657, 'weight_decay': 0.07586384351797155, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.26219905493310736}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718911476.893085   11611 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e5aa437f341699f6:0:0), session_name()
I0000 00:00:1718911521.398664   11611 tpu_compile_op_common.cc:245] Compilation of e5aa437f341699f6:0:0 with session name  took 44.505486865s and succeeded
I0000 00:00:1718911521.645164   11611 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e5aa437f341699f6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5189782_15987129747092401814", property.function_library_fingerprint = 16953611117531748716, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718911566.894827   11569 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(27ab507e96bc59d8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5189782_15987129747092401814", property.function_library_fingerprint = 16953611117531748716, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718911566.896086   11569 tpu_compilation_cache_interface.cc:541] After adding entry for key 27ab507e96bc59d8:0:0 with session_name  cache is 161 entries (32479842001 bytes),  marked for eviction 117 entries (23918608856 bytes).
I0000 00



I0000 00:00:1718911605.427380   11557 tpu_compile_op_common.cc:245] Compilation of fb67cab8ac19ffe2:0:0 with session name  took 5.570613457s and succeeded
I0000 00:00:1718911605.470290   11557 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fb67cab8ac19ffe2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5226827_16191380764928846574", property.function_library_fingerprint = 12797572723933740467, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718911605.470543   11557 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7767883539199829 and accuracy: 0.8140756487846375


[I 2024-06-20 19:27:04,745] Trial 170 finished with value: 0.8140756487846375 and parameters: {'num_epochs': 6, 'dropout_rate': 0.196598492278417, 'weight_decay': 0.054046966861163365, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.43001822106477455}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


I0000 00:00:1718911889.133166   11585 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5a43c7f49d6176f2:0:0), session_name()
I0000 00:00:1718911936.735763   11585 tpu_compile_op_common.cc:245] Compilation of 5a43c7f49d6176f2:0:0 with session name  took 47.602546323s and succeeded
I0000 00:00:1718911936.963718   11585 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5a43c7f49d6176f2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5376308_3484514607192823015", property.function_library_fingerprint = 5920364943026743339, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718912005.356601   11585 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ae068085c4d2797a:0:0), session_name()
I0000 00:00:1718912011.515256   11585 tpu_compile_op_common.cc:245] Compilation of ae068085c4d2797a:0:0 with session name  took 6.158599077s and succeeded
I0000 00:00:1718912011.561471   11585 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ae068085c4d2797a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5413399_13729824635263566942", property.function_library_fingerprint = 2805840677094571463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718912017.133608   11578 tpu_compile_op_common.cc:245] Compilation of 62e31dc366f514cb:0:0 with session name  took 5.443383803s and succeeded
I0000 00:00:1718912017.186255   11578 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(62e31dc366f514cb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5413399_13729824635263566942", property.function_library_fingerprint = 2805840677094571463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718912017.186518   11578 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7744970321655273 and accuracy: 0.8361344337463379


[I 2024-06-20 19:34:15,461] Trial 177 finished with value: 0.8361344337463379 and parameters: {'num_epochs': 12, 'dropout_rate': 0.3218178155411414, 'weight_decay': 0.031231164190658023, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.7972134095900004}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718912319.311343   11551 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e50a9aa3135df859:0:0), session_name()
I0000 00:00:1718912363.082506   11551 tpu_compile_op_common.cc:245] Compilation of e50a9aa3135df859:0:0 with session name  took 43.771107299s and succeeded
I0000 00:00:1718912363.296162   11551 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e50a9aa3135df859:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5573234_15149462372123461480", property.function_library_fingerprint = 2357625115702403218, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718912405.608558   11605 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(496058542eb9a068:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5573234_15149462372123461480", property.function_library_fingerprint = 2357625115702403218, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718912405.609326   11605 tpu_compilation_cache_interface.cc:541] After adding entry for key 496058542eb9a068:0:0 with session_name  cache is 169 entries (33988915035 bytes),  marked for eviction 126 entries (25503109413 bytes).
I0000 00:



I0000 00:00:1718912443.524043   11535 tpu_compile_op_common.cc:245] Compilation of 517a58bed1310409:0:0 with session name  took 5.521147833s and succeeded
I0000 00:00:1718912443.573377   11535 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(517a58bed1310409:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5610279_6601468295302094992", property.function_library_fingerprint = 733464223498294520, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718912443.573630   11535 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7386348843574524 and accuracy: 0.8424369692802429


[I 2024-06-20 19:40:58,382] Trial 183 finished with value: 0.8424369692802429 and parameters: {'num_epochs': 4, 'dropout_rate': 0.18420559921260762, 'weight_decay': 0.034232609317314325, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.5715600637741876}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718912723.049866   11528 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(367a9df8bf6688a7:0:0), session_name()
I0000 00:00:1718912767.310711   11528 tpu_compile_op_common.cc:245] Compilation of 367a9df8bf6688a7:0:0 with session name  took 44.260773044s and succeeded
I0000 00:00:1718912767.541206   11528 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(367a9df8bf6688a7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5756098_15681502172311232738", property.function_library_fingerprint = 17920762996487102269, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718912834.812488   11537 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2915a8e45fa30738:0:0), session_name()
I0000 00:00:1718912841.274641   11537 tpu_compile_op_common.cc:245] Compilation of 2915a8e45fa30738:0:0 with session name  took 6.462111882s and succeeded
I0000 00:00:1718912841.329941   11537 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2915a8e45fa30738:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5793143_14326753070496983914", property.function_library_fingerprint = 12919636321270083537, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718912846.827237   11542 tpu_compile_op_common.cc:245] Compilation of c6560ca9c583a019:0:0 with session name  took 5.392789787s and succeeded
I0000 00:00:1718912846.872445   11542 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c6560ca9c583a019:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5793143_14326753070496983914", property.function_library_fingerprint = 12919636321270083537, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718912846.872725   11542 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7634527683258057 and accuracy: 0.8482142686843872


2024-06-20 19:48:10.716041: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718912892.717079   11592 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3d5e1df53aae88d5:0:0), session_name()
I0000 00:00:1718912898.195635   11592 tpu_compile_op_common.cc:245] Compilation of 3d5e1df53aae88d5:0:0 with session name  took 5.478492728s and succeeded
I0000 00:00:1718912898.223861   11592 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3d5e1df53aae88d5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5139652534869716162", property.function_library_fingerprint = 12521358814957698267, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718912910.258209   11546 tpu_compile_op_common.cc:245] Compilation of cec20d1181fa4274:0:0 with session name  took 5.670735875s and succeeded
I0000 00:00:1718912910.298470   11546 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cec20d1181fa4274:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5815543_9933222137547241612", property.function_library_fingerprint = 12521358814957698267, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718912910.298729   11546 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/2


I0000 00:00:1718913007.193772   11559 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(96392ebee5131987:0:0), session_name()
I0000 00:00:1718913083.680239   11559 tpu_compile_op_common.cc:245] Compilation of 96392ebee5131987:0:0 with session name  took 1m16.486389465s and succeeded
I0000 00:00:1718913083.920215   11559 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(96392ebee5131987:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5851044_6967168340901494066", property.function_library_fingerprint = 6193055983567445874, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si

Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_130_accuracy_0.8466_avg_score_0.8044_f1_0.7622_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_130_accuracy_0.8466_avg_score_0.8044_f1_0.7622_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 19:52:45,390] Trial 189 finished with value: 0.8482142686843872 and parameters: {'num_epochs': 7, 'dropout_rate': 0.21797604171992513, 'weight_decay': 0.0366387571280811, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.5974096296109452}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718913435.976170   11601 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c5180ca39f7d291c:0:0), session_name()
I0000 00:00:1718913477.627202   11601 tpu_compile_op_common.cc:245] Compilation of c5180ca39f7d291c:0:0 with session name  took 41.650984319s and succeeded
I0000 00:00:1718913477.812702   11601 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c5180ca39f7d291c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6018187_4900600502756607114", property.function_library_fingerprint = 18318551113141530186, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718913520.923005   11525 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4e698011082c3044:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6018187_4900600502756607114", property.function_library_fingerprint = 18318551113141530186, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718913520.925042   11525 tpu_compilation_cache_interface.cc:541] After adding entry for key 4e698011082c3044:0:0 with session_name  cache is 182 entries (36565984997 bytes),  marked for eviction 139 entries (28080164187 bytes).
I0000 00:



I0000 00:00:1718913559.559735   11565 tpu_compile_op_common.cc:245] Compilation of 420a8c38ec01f3e8:0:0 with session name  took 5.796729877s and succeeded
I0000 00:00:1718913559.603045   11565 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(420a8c38ec01f3e8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6055236_701470027785615487", property.function_library_fingerprint = 4606877940860311672, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718913559.603297   11565 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7540048956871033 and accuracy: 0.8466386795043945


[I 2024-06-20 19:59:38,743] Trial 198 finished with value: 0.8466386795043945 and parameters: {'num_epochs': 6, 'dropout_rate': 0.22782592961462078, 'weight_decay': 0.03725534306046789, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.29454385157512375}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718913850.537009   11607 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(205d8debedeb216a:0:0), session_name()
I0000 00:00:1718913898.370314   11607 tpu_compile_op_common.cc:245] Compilation of 205d8debedeb216a:0:0 with session name  took 47.833259684s and succeeded
I0000 00:00:1718913898.580201   11607 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(205d8debedeb216a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6204559_168703682568202374", property.function_library_fingerprint = 13304320069791639732, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718913964.856348   11556 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dcc0cee0e547b211:0:0), session_name()
I0000 00:00:1718913970.989482   11556 tpu_compile_op_common.cc:245] Compilation of dcc0cee0e547b211:0:0 with session name  took 6.133082223s and succeeded
I0000 00:00:1718913971.031676   11556 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dcc0cee0e547b211:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6241604_9712530989376820070", property.function_library_fingerprint = 1713150629842756425, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718913976.766811   11517 tpu_compile_op_common.cc:245] Compilation of de211059193f73bf:0:0 with session name  took 5.629010443s and succeeded
I0000 00:00:1718913976.816940   11517 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(de211059193f73bf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6241604_9712530989376820070", property.function_library_fingerprint = 1713150629842756425, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718913976.817174   11517 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7683572173118591 and accuracy: 0.8361344337463379


[I 2024-06-20 20:06:39,169] Trial 204 finished with value: 0.8361344337463379 and parameters: {'num_epochs': 7, 'dropout_rate': 0.21629143923638078, 'weight_decay': 0.04110715140273293, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.5957502431266442}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718914267.476660   11547 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7f052cfde88e93c0:0:0), session_name()
I0000 00:00:1718914313.160626   11547 tpu_compile_op_common.cc:245] Compilation of 7f052cfde88e93c0:0:0 with session name  took 45.683899115s and succeeded
I0000 00:00:1718914313.387276   11547 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7f052cfde88e93c0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6392695_17778419657013466422", property.function_library_fingerprint = 14079527190002809890, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718914381.574720   11606 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fd8af4971516744e:0:0), session_name()
I0000 00:00:1718914387.855854   11606 tpu_compile_op_common.cc:245] Compilation of fd8af4971516744e:0:0 with session name  took 6.281064454s and succeeded
I0000 00:00:1718914387.898123   11606 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fd8af4971516744e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6429744_12518742281406507868", property.function_library_fingerprint = 3761603951593139963, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718914393.546996   11569 tpu_compile_op_common.cc:245] Compilation of 411b991dccd78472:0:0 with session name  took 5.523379168s and succeeded
I0000 00:00:1718914393.593211   11569 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(411b991dccd78472:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6429744_12518742281406507868", property.function_library_fingerprint = 3761603951593139963, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718914393.593502   11569 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.789718508720398 and accuracy: 0.8308823704719543


[I 2024-06-20 20:13:42,139] Trial 211 finished with value: 0.8308823704719543 and parameters: {'num_epochs': 9, 'dropout_rate': 0.20593323170964778, 'weight_decay': 0.038911630887316334, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.391072137500411}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718914694.642551   11567 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b84aebb7dae71721:0:0), session_name()
I0000 00:00:1718914739.650575   11567 tpu_compile_op_common.cc:245] Compilation of b84aebb7dae71721:0:0 with session name  took 45.007946949s and succeeded
I0000 00:00:1718914739.888497   11567 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b84aebb7dae71721:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6584323_16874919314242093008", property.function_library_fingerprint = 5712116409976667801, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718914809.789505   11596 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2a3b202f403e74fe:0:0), session_name()
I0000 00:00:1718914816.596396   11596 tpu_compile_op_common.cc:245] Compilation of 2a3b202f403e74fe:0:0 with session name  took 6.806840331s and succeeded
I0000 00:00:1718914816.648955   11596 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2a3b202f403e74fe:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6621368_15693110116077122544", property.function_library_fingerprint = 17598622037394924463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718914822.522771   11564 tpu_compile_op_common.cc:245] Compilation of f5d00a5cc7310f59:0:0 with session name  took 5.760362923s and succeeded
I0000 00:00:1718914822.565729   11564 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f5d00a5cc7310f59:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6621368_15693110116077122544", property.function_library_fingerprint = 17598622037394924463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718914822.566008   11564 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7655985355377197 and accuracy: 0.8445377945899963


[I 2024-06-20 20:20:51,462] Trial 217 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 8, 'dropout_rate': 0.22992614212797147, 'weight_decay': 0.0338699716407292, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.34801169837783474}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718915128.161930   11577 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(16155000ed4aaf93:0:0), session_name()
I0000 00:00:1718915174.169167   11577 tpu_compile_op_common.cc:245] Compilation of 16155000ed4aaf93:0:0 with session name  took 46.007169833s and succeeded
I0000 00:00:1718915174.386815   11577 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(16155000ed4aaf93:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6774211_16782239160653414277", property.function_library_fingerprint = 11706548608732645650, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718915245.750883   11553 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ab896bcff0ce0e71:0:0), session_name()
I0000 00:00:1718915252.512264   11553 tpu_compile_op_common.cc:245] Compilation of ab896bcff0ce0e71:0:0 with session name  took 6.76133664s and succeeded
I0000 00:00:1718915252.566867   11553 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ab896bcff0ce0e71:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6811260_6389352507480870830", property.function_library_fingerprint = 10906593016221079025, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718915259.073991   11521 tpu_compile_op_common.cc:245] Compilation of ac3a7f16639998b6:0:0 with session name  took 6.405019427s and succeeded
I0000 00:00:1718915259.119574   11521 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ac3a7f16639998b6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6811260_6389352507480870830", property.function_library_fingerprint = 10906593016221079025, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718915259.119944   11521 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7713284492492676 and accuracy: 0.8408613204956055


[I 2024-06-20 20:28:11,463] Trial 223 finished with value: 0.8408613204956055 and parameters: {'num_epochs': 9, 'dropout_rate': 0.24450900680129983, 'weight_decay': 0.029688164360891996, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.36703901716489984}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718915560.820837   11572 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ec9776442501fe79:0:0), session_name()
I0000 00:00:1718915607.474010   11572 tpu_compile_op_common.cc:245] Compilation of ec9776442501fe79:0:0 with session name  took 46.653106773s and succeeded
I0000 00:00:1718915607.709771   11572 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec9776442501fe79:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6965997_5255165968580955929", property.function_library_fingerprint = 14758671800098458809, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718915650.185934   11518 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9a0220e1bd189ebe:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6965997_5255165968580955929", property.function_library_fingerprint = 14758671800098458809, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718915650.187046   11518 tpu_compilation_cache_interface.cc:541] After adding entry for key 9a0220e1bd189ebe:0:0 with session_name  cache is 202 entries (40338481681 bytes),  marked for eviction 158 entries (31804986717 bytes).
I0000 00:



I0000 00:00:1718915690.397462   11563 tpu_compile_op_common.cc:245] Compilation of db9855c3fe703017:0:0 with session name  took 5.793950104s and succeeded
I0000 00:00:1718915690.439125   11563 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(db9855c3fe703017:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7003088_11134067062234566913", property.function_library_fingerprint = 16041474825620188085, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718915690.439410   11563 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7452136278152466 and accuracy: 0.8308823704719543


[I 2024-06-20 20:35:12,760] Trial 229 finished with value: 0.8308823704719543 and parameters: {'num_epochs': 7, 'dropout_rate': 0.21187339481953488, 'weight_decay': 0.03466444557954448, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.17368101360566773}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718915979.899401   11559 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dd012d6bf1e0bb8d:0:0), session_name()
I0000 00:00:1718916027.822583   11559 tpu_compile_op_common.cc:245] Compilation of dd012d6bf1e0bb8d:0:0 with session name  took 47.92309347s and succeeded
I0000 00:00:1718916028.027019   11559 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dd012d6bf1e0bb8d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7154163_16212178614717475886", property.function_library_fingerprint = 10985366212739219774, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718916070.055237   11548 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3aa134a59382558e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7154163_16212178614717475886", property.function_library_fingerprint = 10985366212739219774, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718916070.056638   11548 tpu_compilation_cache_interface.cc:541] After adding entry for key 3aa134a59382558e:0:0 with session_name  cache is 206 entries (41092945519 bytes),  marked for eviction 162 entries (32559447355 bytes).
I0000 00



I0000 00:00:1718916110.421533   11550 tpu_compile_op_common.cc:245] Compilation of 7b286782777e88b9:0:0 with session name  took 6.146540611s and succeeded
I0000 00:00:1718916110.475270   11550 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7b286782777e88b9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7191208_7247231034009398528", property.function_library_fingerprint = 6532270853849966021, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718916110.475593   11550 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7712395787239075 and accuracy: 0.8314075469970703


[I 2024-06-20 20:42:09,803] Trial 234 finished with value: 0.8314075469970703 and parameters: {'num_epochs': 6, 'dropout_rate': 0.21765461134638167, 'weight_decay': 0.056484144456895334, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.24311526377922602}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718916394.754229   11545 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(98d0d988768f084f:0:0), session_name()
I0000 00:00:1718916445.100082   11545 tpu_compile_op_common.cc:245] Compilation of 98d0d988768f084f:0:0 with session name  took 50.345748684s and succeeded
I0000 00:00:1718916445.336194   11545 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(98d0d988768f084f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7340547_2335755165134318539", property.function_library_fingerprint = 2795985845385369998, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718916487.484383   11523 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3e110891b98865ea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7340547_2335755165134318539", property.function_library_fingerprint = 2795985845385369998, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718916487.485455   11523 tpu_compilation_cache_interface.cc:541] After adding entry for key 3e110891b98865ea:0:0 with session_name  cache is 210 entries (41847423305 bytes),  marked for eviction 166 entries (33314059111 bytes).
I0000 00:0



I0000 00:00:1718916526.780375   11580 tpu_compile_op_common.cc:245] Compilation of eed952a76b51ded8:0:0 with session name  took 6.04115541s and succeeded
I0000 00:00:1718916526.840268   11580 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eed952a76b51ded8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7377596_17430791991083304199", property.function_library_fingerprint = 8138375794588171154, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718916526.840671   11580 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7992020845413208 and accuracy: 0.8350840210914612


[I 2024-06-20 20:49:18,868] Trial 241 finished with value: 0.8350840210914612 and parameters: {'num_epochs': 10, 'dropout_rate': 0.1359854893068743, 'weight_decay': 0.03126478281675717, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.330001351266804}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718916821.301188   11599 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(22ee34daf8787974:0:0), session_name()
I0000 00:00:1718916865.419893   11599 tpu_compile_op_common.cc:245] Compilation of 22ee34daf8787974:0:0 with session name  took 44.118657223s and succeeded
I0000 00:00:1718916865.650661   11599 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(22ee34daf8787974:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7533943_17094423122186206467", property.function_library_fingerprint = 12710581894948361200, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718916932.416062   11518 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(39d074e93bcab939:0:0), session_name()
I0000 00:00:1718916938.686763   11518 tpu_compile_op_common.cc:245] Compilation of 39d074e93bcab939:0:0 with session name  took 6.270637035s and succeeded
I0000 00:00:1718916938.743326   11518 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39d074e93bcab939:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7570992_12258109513440606534", property.function_library_fingerprint = 17139340030044766101, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718916944.315083   11556 tpu_compile_op_common.cc:245] Compilation of 6ae45e6e0c34c68a:0:0 with session name  took 5.455136246s and succeeded
I0000 00:00:1718916944.369844   11556 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6ae45e6e0c34c68a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7570992_12258109513440606534", property.function_library_fingerprint = 17139340030044766101, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718916944.370138   11556 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7889160513877869 and accuracy: 0.841911792755127


[I 2024-06-20 20:56:15,149] Trial 248 finished with value: 0.841911792755127 and parameters: {'num_epochs': 9, 'dropout_rate': 0.22557865207784453, 'weight_decay': 0.06014994094475201, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7580042643057769}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718917244.357602   11581 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5c3fd21811f0acd5:0:0), session_name()
I0000 00:00:1718917287.718228   11581 tpu_compile_op_common.cc:245] Compilation of 5c3fd21811f0acd5:0:0 with session name  took 43.360579913s and succeeded
I0000 00:00:1718917287.950331   11581 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5c3fd21811f0acd5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7725571_7177983755070536670", property.function_library_fingerprint = 15080765702816710483, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718917356.867932   11581 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(196e9a6842435f15:0:0), session_name()
I0000 00:00:1718917363.729877   11581 tpu_compile_op_common.cc:245] Compilation of 196e9a6842435f15:0:0 with session name  took 6.861885192s and succeeded
I0000 00:00:1718917363.780309   11581 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(196e9a6842435f15:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7762616_5197313661460798046", property.function_library_fingerprint = 1566871832293291094, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718917369.822685   11527 tpu_compile_op_common.cc:245] Compilation of 9e53d375e08ff402:0:0 with session name  took 5.923379834s and succeeded
I0000 00:00:1718917369.871761   11527 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9e53d375e08ff402:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7762616_5197313661460798046", property.function_library_fingerprint = 1566871832293291094, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718917369.871991   11527 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7900239825248718 and accuracy: 0.8429622054100037


[I 2024-06-20 21:03:15,264] Trial 255 finished with value: 0.8429622054100037 and parameters: {'num_epochs': 8, 'dropout_rate': 0.1476485920972759, 'weight_decay': 0.023068960453495015, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.2945245440825317}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718917657.480072   11554 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e8bfe63bd9a16bfc:0:0), session_name()
I0000 00:00:1718917703.711635   11554 tpu_compile_op_common.cc:245] Compilation of e8bfe63bd9a16bfc:0:0 with session name  took 46.23149058s and succeeded
I0000 00:00:1718917703.945459   11554 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e8bfe63bd9a16bfc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7915459_4977702524430520356", property.function_library_fingerprint = 2787599713045968605, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718917746.330229   11573 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dc8714bdd4ee7110:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7915459_4977702524430520356", property.function_library_fingerprint = 2787599713045968605, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718917746.330869   11573 tpu_compilation_cache_interface.cc:541] After adding entry for key dc8714bdd4ee7110:0:0 with session_name  cache is 222 entries (44110843355 bytes),  marked for eviction 177 entries (35735798954 bytes).
I0000 00:0



I0000 00:00:1718917785.513172   11556 tpu_compile_op_common.cc:245] Compilation of 2372c87c093fdbac:0:0 with session name  took 6.0176877s and succeeded
I0000 00:00:1718917785.563379   11556 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2372c87c093fdbac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7952508_12473236078623852348", property.function_library_fingerprint = 5563351747708447557, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718917785.563610   11556 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7668619751930237 and accuracy: 0.8503151535987854


2024-06-20 21:10:32.174690: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718917834.081755   11563 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(48d239037ea3e9d5:0:0), session_name()
I0000 00:00:1718917839.441388   11563 tpu_compile_op_common.cc:245] Compilation of 48d239037ea3e9d5:0:0 with session name  took 5.359562932s and succeeded
I0000 00:00:1718917839.478714   11563 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(48d239037ea3e9d5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14412464380944848968", property.function_library_fingerprint = 554177743758527824, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718917852.225205   11575 tpu_compile_op_common.cc:245] Compilation of d0bb490a015e917d:0:0 with session name  took 6.123336491s and succeeded
I0000 00:00:1718917852.269503   11575 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d0bb490a015e917d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7976660_3224255248283430208", property.function_library_fingerprint = 554177743758527824, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718917852.269743   11575 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 1/3


I0000 00:00:1718917948.741846   11548 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f9f366852de9d46c:0:0), session_name()
I0000 00:00:1718918026.326644   11548 tpu_compile_op_common.cc:245] Compilation of f9f366852de9d46c:0:0 with session name  took 1m17.584721148s and succeeded
I0000 00:00:1718918026.612117   11548 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f9f366852de9d46c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8012165_12771159249374131312", property.function_library_fingerprint = 15441050022020804035, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_

Epoch 2/3
Epoch 3/3
File ./saved_models/disaster_BERT_prePost_0_model_trial_141_accuracy_0.8472_avg_score_0.7968_f1_0.7465_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_141_accuracy_0.8472_avg_score_0.7968_f1_0.7465_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 21:15:11,113] Trial 262 finished with value: 0.8503151535987854 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2810967957308949, 'weight_decay': 0.034883217407747115, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.27304658558719946}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718918373.060762   11577 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6ca60502f2a012bd:0:0), session_name()
I0000 00:00:1718918418.455406   11577 tpu_compile_op_common.cc:245] Compilation of 6ca60502f2a012bd:0:0 with session name  took 45.394574317s and succeeded
I0000 00:00:1718918418.646050   11577 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6ca60502f2a012bd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8179854_11548771933873108617", property.function_library_fingerprint = 4739553340964128178, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718918486.319226   11557 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7874951e9c90daf2:0:0), session_name()
I0000 00:00:1718918493.539696   11557 tpu_compile_op_common.cc:245] Compilation of 7874951e9c90daf2:0:0 with session name  took 7.220386564s and succeeded
I0000 00:00:1718918493.590262   11557 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7874951e9c90daf2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8216883_16335071243607680964", property.function_library_fingerprint = 15628896887433385492, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718918499.661676   11603 tpu_compile_op_common.cc:245] Compilation of bdd36570cd455db7:0:0 with session name  took 5.938497712s and succeeded
I0000 00:00:1718918499.712664   11603 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bdd36570cd455db7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8216883_16335071243607680964", property.function_library_fingerprint = 15628896887433385492, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718918499.712935   11603 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7287938594818115 and accuracy: 0.8466386795043945


[I 2024-06-20 21:21:54,294] Trial 271 finished with value: 0.8466386795043945 and parameters: {'num_epochs': 4, 'dropout_rate': 0.20699128245302825, 'weight_decay': 0.04306935538810189, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.2724115961205451}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718918782.651901   11553 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4787e18c9038fc61:0:0), session_name()
I0000 00:00:1718918828.573357   11553 tpu_compile_op_common.cc:245] Compilation of 4787e18c9038fc61:0:0 with session name  took 45.921407694s and succeeded
I0000 00:00:1718918828.770110   11553 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4787e18c9038fc61:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8362702_3099197057910744763", property.function_library_fingerprint = 15538586653312201430, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718918873.170731   11528 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2366eae0781ae9ac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8362702_3099197057910744763", property.function_library_fingerprint = 15538586653312201430, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718918873.172167   11528 tpu_compilation_cache_interface.cc:541] After adding entry for key 2366eae0781ae9ac:0:0 with session_name  cache is 235 entries (46687641959 bytes),  marked for eviction 191 entries (38154529767 bytes).
I0000 00:



I0000 00:00:1718918914.235375   11556 tpu_compile_op_common.cc:245] Compilation of d22b282efe8159d3:0:0 with session name  took 6.26580762s and succeeded
I0000 00:00:1718918914.289042   11556 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d22b282efe8159d3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8399747_2363298885068534284", property.function_library_fingerprint = 4642711585949501312, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718918914.289574   11556 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7857315540313721 and accuracy: 0.8377100825309753


[I 2024-06-20 21:29:01,838] Trial 278 finished with value: 0.8377100825309753 and parameters: {'num_epochs': 8, 'dropout_rate': 0.19449231232077502, 'weight_decay': 0.05868886833017143, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.8595441427011197}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718919205.782583   11607 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a41f364f54704abc:0:0), session_name()
I0000 00:00:1718919253.879438   11607 tpu_compile_op_common.cc:245] Compilation of a41f364f54704abc:0:0 with session name  took 48.096777866s and succeeded
I0000 00:00:1718919254.123298   11607 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a41f364f54704abc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8552518_6178361349147948431", property.function_library_fingerprint = 11649308351631514156, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718919298.196528   11524 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(93a5841548b42f58:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8552518_6178361349147948431", property.function_library_fingerprint = 11649308351631514156, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718919298.197972   11524 tpu_compilation_cache_interface.cc:541] After adding entry for key 93a5841548b42f58:0:0 with session_name  cache is 239 entries (47441824015 bytes),  marked for eviction 195 entries (38908994245 bytes).
I0000 00:



I0000 00:00:1718919336.930584   11534 tpu_compile_op_common.cc:245] Compilation of a781c1c106e4813f:0:0 with session name  took 5.416531385s and succeeded
I0000 00:00:1718919336.977939   11534 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a781c1c106e4813f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8589547_12771110009932484497", property.function_library_fingerprint = 12932138339844395523, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718919336.978219   11534 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.78741854429245 and accuracy: 0.8434873819351196


[I 2024-06-20 21:36:07,391] Trial 285 finished with value: 0.8434873819351196 and parameters: {'num_epochs': 9, 'dropout_rate': 0.18149342138208582, 'weight_decay': 0.046968011813061615, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.12436062027822915}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718919634.154167   11610 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9fd9758ea9c72d11:0:0), session_name()
I0000 00:00:1718919679.184026   11610 tpu_compile_op_common.cc:245] Compilation of 9fd9758ea9c72d11:0:0 with session name  took 45.029781266s and succeeded
I0000 00:00:1718919679.394278   11610 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9fd9758ea9c72d11:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8744070_12753249651175822902", property.function_library_fingerprint = 10306899866488752797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718919722.318879   11604 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7ee6dac7816d34fd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8744070_12753249651175822902", property.function_library_fingerprint = 10306899866488752797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718919722.320410   11604 tpu_compilation_cache_interface.cc:541] After adding entry for key 7ee6dac7816d34fd:0:0 with session_name  cache is 243 entries (48196005799 bytes),  marked for eviction 199 entries (39663472079 bytes).
I0000 00



I0000 00:00:1718919761.551278   11599 tpu_compile_op_common.cc:245] Compilation of c09032f48d11c158:0:0 with session name  took 6.190676728s and succeeded
I0000 00:00:1718919761.596923   11599 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c09032f48d11c158:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8781099_10505242125347711646", property.function_library_fingerprint = 12581804882500485543, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718919761.597215   11599 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7811617851257324 and accuracy: 0.8240545988082886


[I 2024-06-20 21:43:03,684] Trial 291 finished with value: 0.8240545988082886 and parameters: {'num_epochs': 7, 'dropout_rate': 0.19956359550093564, 'weight_decay': 0.041901538121590275, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.4106036291192102}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718920045.384032   11538 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e926befbb1757eb3:0:0), session_name()
I0000 00:00:1718920091.753083   11538 tpu_compile_op_common.cc:245] Compilation of e926befbb1757eb3:0:0 with session name  took 46.368976755s and succeeded
I0000 00:00:1718920091.976715   11538 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e926befbb1757eb3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8932190_6688857631699413705", property.function_library_fingerprint = 8892258212453857593, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718920163.992815   11566 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(392e3111d76610bb:0:0), session_name()
I0000 00:00:1718920171.248823   11566 tpu_compile_op_common.cc:245] Compilation of 392e3111d76610bb:0:0 with session name  took 7.255947365s and succeeded
I0000 00:00:1718920171.313011   11566 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(392e3111d76610bb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8969239_4537127688260359703", property.function_library_fingerprint = 10854716899123884116, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718920177.630771   11610 tpu_compile_op_common.cc:245] Compilation of 75de11fadedfbd99:0:0 with session name  took 6.179294436s and succeeded
I0000 00:00:1718920177.678555   11610 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(75de11fadedfbd99:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8969239_4537127688260359703", property.function_library_fingerprint = 10854716899123884116, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718920177.678884   11610 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7687178254127502 and accuracy: 0.8329831957817078


[I 2024-06-20 21:49:56,799] Trial 297 finished with value: 0.8329831957817078 and parameters: {'num_epochs': 6, 'dropout_rate': 0.21465708977275194, 'weight_decay': 0.01974272128133784, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7786990033162704}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718920471.631606   11603 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f7577a618a294dca:0:0), session_name()
I0000 00:00:1718920519.703493   11603 tpu_compile_op_common.cc:245] Compilation of f7577a618a294dca:0:0 with session name  took 48.0718106s and succeeded
I0000 00:00:1718920519.933574   11603 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f7577a618a294dca:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9118720_14419073885877302682", property.function_library_fingerprint = 3813812710040577091, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718920563.480598   11580 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3aa4aae2fe946fd1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9118720_14419073885877302682", property.function_library_fingerprint = 3813812710040577091, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718920563.482788   11580 tpu_compilation_cache_interface.cc:541] After adding entry for key 3aa4aae2fe946fd1:0:0 with session_name  cache is 251 entries (49705100149 bytes),  marked for eviction 207 entries (41172550873 bytes).
I0000 00:



I0000 00:00:1718920605.090909   11594 tpu_compile_op_common.cc:245] Compilation of 515ac4a8372ae9ea:0:0 with session name  took 6.395554227s and succeeded
I0000 00:00:1718920605.147047   11594 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(515ac4a8372ae9ea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9155811_11609325736428059317", property.function_library_fingerprint = 16249014819888548447, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718920605.147396   11594 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7318304181098938 and accuracy: 0.8445377945899963


[I 2024-06-20 21:57:00,796] Trial 303 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 5, 'dropout_rate': 0.25425324517482656, 'weight_decay': 0.005315073540174822, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.320710994702798}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718920889.611735   11539 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(529bf77151a3ae2a:0:0), session_name()
I0000 00:00:1718920937.393392   11539 tpu_compile_op_common.cc:245] Compilation of 529bf77151a3ae2a:0:0 with session name  took 47.781588504s and succeeded
I0000 00:00:1718920937.612509   11539 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(529bf77151a3ae2a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9303398_15060213617938568495", property.function_library_fingerprint = 1081443591070531380, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718920980.965911   11541 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c380165c84b98e4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9303398_15060213617938568495", property.function_library_fingerprint = 1081443591070531380, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718920980.967774   11541 tpu_compilation_cache_interface.cc:541] After adding entry for key c380165c84b98e4:0:0 with session_name  cache is 255 entries (50459578943 bytes),  marked for eviction 211 entries (41927028659 bytes).
I0000 00:00



I0000 00:00:1718921020.901798   11608 tpu_compile_op_common.cc:245] Compilation of 385f2ea6d7dc9863:0:0 with session name  took 6.370676889s and succeeded
I0000 00:00:1718921020.956967   11608 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(385f2ea6d7dc9863:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9340447_7400168928249803829", property.function_library_fingerprint = 3665013200298286440, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718921020.957213   11608 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7796542644500732 and accuracy: 0.8377100825309753


[I 2024-06-20 22:04:10,596] Trial 309 finished with value: 0.8377100825309753 and parameters: {'num_epochs': 8, 'dropout_rate': 0.23428158671331129, 'weight_decay': 0.03457634028508152, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.5780639132405477}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718921316.751628   11566 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(56a0e57d6387e90e:0:0), session_name()
I0000 00:00:1718921362.751534   11566 tpu_compile_op_common.cc:245] Compilation of 56a0e57d6387e90e:0:0 with session name  took 45.999729741s and succeeded
I0000 00:00:1718921363.000409   11566 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(56a0e57d6387e90e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9493290_3224877218080627730", property.function_library_fingerprint = 11647318009278388772, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718921409.142492   11554 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b4c3d57467b9b247:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9493290_3224877218080627730", property.function_library_fingerprint = 11647318009278388772, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718921409.144077   11554 tpu_compilation_cache_interface.cc:541] After adding entry for key b4c3d57467b9b247:0:0 with session_name  cache is 259 entries (51214056729 bytes),  marked for eviction 215 entries (42681506493 bytes).
I0000 00:



I0000 00:00:1718921448.775531   11540 tpu_compile_op_common.cc:245] Compilation of 29951b7982f29f7f:0:0 with session name  took 6.390031468s and succeeded
I0000 00:00:1718921448.853463   11540 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(29951b7982f29f7f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9530339_7748094171176088196", property.function_library_fingerprint = 15466563565523926950, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718921448.853826   11540 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7710116505622864 and accuracy: 0.8513655662536621


2024-06-20 22:11:35.457696: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718921497.358387   11555 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e6d14dd9a5f52ccf:0:0), session_name()
I0000 00:00:1718921503.178452   11555 tpu_compile_op_common.cc:245] Compilation of e6d14dd9a5f52ccf:0:0 with session name  took 5.820000526s and succeeded
I0000 00:00:1718921503.213784   11555 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e6d14dd9a5f52ccf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15963618698850888960", property.function_library_fingerprint = 8518100611531797130, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718921516.512214   11587 tpu_compile_op_common.cc:245] Compilation of 67d53c1b04c6981f:0:0 with session name  took 6.155718814s and succeeded
I0000 00:00:1718921516.550120   11587 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(67d53c1b04c6981f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9554491_8549817682493316345", property.function_library_fingerprint = 8518100611531797130, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718921516.550304   11587 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/3


I0000 00:00:1718921613.486212   11529 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e6a8096bdb4e89e:0:0), session_name()
I0000 00:00:1718921694.145975   11529 tpu_compile_op_common.cc:245] Compilation of e6a8096bdb4e89e:0:0 with session name  took 1m20.659699491s and succeeded
I0000 00:00:1718921694.438433   11529 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e6a8096bdb4e89e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9589996_7135978778794382324", property.function_library_fingerprint = 15422685153948593702, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size

Epoch 2/3
Epoch 3/3
File ./saved_models/disaster_BERT_prePost_0_model_trial_189_accuracy_0.8482_avg_score_0.8058_f1_0.7635_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_189_accuracy_0.8482_avg_score_0.8058_f1_0.7635_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 22:16:19,434] Trial 315 finished with value: 0.8513655662536621 and parameters: {'num_epochs': 8, 'dropout_rate': 0.27837837411430194, 'weight_decay': 0.044585023042827424, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.5915169243176196}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718922044.542863   11567 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(958832925b21614f:0:0), session_name()
I0000 00:00:1718922089.584084   11567 tpu_compile_op_common.cc:245] Compilation of 958832925b21614f:0:0 with session name  took 45.041128405s and succeeded
I0000 00:00:1718922089.775648   11567 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(958832925b21614f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9757757_10220210902023003882", property.function_library_fingerprint = 5889598054861723512, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718922159.950450   11564 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5b5489836ef10ade:0:0), session_name()
I0000 00:00:1718922166.968924   11564 tpu_compile_op_common.cc:245] Compilation of 5b5489836ef10ade:0:0 with session name  took 7.018407366s and succeeded
I0000 00:00:1718922167.032186   11564 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5b5489836ef10ade:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9794806_10606710399083943156", property.function_library_fingerprint = 15117019446198166973, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718922173.235806   11554 tpu_compile_op_common.cc:245] Compilation of 9bb33d018bdbe4d6:0:0 with session name  took 6.074014295s and succeeded
I0000 00:00:1718922173.279887   11554 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9bb33d018bdbe4d6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9794806_10606710399083943156", property.function_library_fingerprint = 15117019446198166973, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718922173.280202   11554 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7808108925819397 and accuracy: 0.8450630307197571


[I 2024-06-20 22:23:21,941] Trial 322 finished with value: 0.8450630307197571 and parameters: {'num_epochs': 9, 'dropout_rate': 0.2220609475043064, 'weight_decay': 0.029675324923367052, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.542204640676467}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718922469.978552   11574 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4eb86dffb95e963b:0:0), session_name()
I0000 00:00:1718922518.740553   11574 tpu_compile_op_common.cc:245] Compilation of 4eb86dffb95e963b:0:0 with session name  took 48.761926158s and succeeded
I0000 00:00:1718922518.951827   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4eb86dffb95e963b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9949401_2797325301987301864", property.function_library_fingerprint = 16896484540742294725, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718922565.439904   11597 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(17ae06e8387aeb1b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9949401_2797325301987301864", property.function_library_fingerprint = 16896484540742294725, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718922565.441231   11597 tpu_compilation_cache_interface.cc:541] After adding entry for key 17ae06e8387aeb1b:0:0 with session_name  cache is 272 entries (53791165011 bytes),  marked for eviction 228 entries (45248496611 bytes).
I0000 00:



I0000 00:00:1718922605.785836   11522 tpu_compile_op_common.cc:245] Compilation of 370ecec75b16efa9:0:0 with session name  took 6.38301533s and succeeded
I0000 00:00:1718922605.837165   11522 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(370ecec75b16efa9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9986450_16166383270810341016", property.function_library_fingerprint = 17272308134714993668, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718922605.837742   11522 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7794419527053833 and accuracy: 0.8408613204956055


[I 2024-06-20 22:30:34,471] Trial 327 finished with value: 0.8408613204956055 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2136991552409024, 'weight_decay': 0.04425376504804719, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.5573792802359704}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718922901.822331   11552 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a429839c6dc2366c:0:0), session_name()
I0000 00:00:1718922947.594609   11552 tpu_compile_op_common.cc:245] Compilation of a429839c6dc2366c:0:0 with session name  took 45.772210866s and succeeded
I0000 00:00:1718922947.852291   11552 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a429839c6dc2366c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10139293_1560812430448070950", property.function_library_fingerprint = 13160932482260215121, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718923015.903592   11568 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(553f322447382b1a:0:0), session_name()
I0000 00:00:1718923022.349087   11568 tpu_compile_op_common.cc:245] Compilation of 553f322447382b1a:0:0 with session name  took 6.445444057s and succeeded
I0000 00:00:1718923022.395223   11568 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(553f322447382b1a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10176342_6185154472392468484", property.function_library_fingerprint = 3878843593513053113, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718923028.151807   11529 tpu_compile_op_common.cc:245] Compilation of f266e4a5aa64b865:0:0 with session name  took 5.64028216s and succeeded
I0000 00:00:1718923028.200625   11529 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f266e4a5aa64b865:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10176342_6185154472392468484", property.function_library_fingerprint = 3878843593513053113, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718923028.200874   11529 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7556495666503906 and accuracy: 0.8508403301239014


2024-06-20 22:37:52.046302: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718923073.980039   11556 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a84d6ab01406924b:0:0), session_name()
I0000 00:00:1718923079.556401   11556 tpu_compile_op_common.cc:245] Compilation of a84d6ab01406924b:0:0 with session name  took 5.576315759s and succeeded
I0000 00:00:1718923079.587880   11556 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a84d6ab01406924b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14966275891142878139", property.function_library_fingerprint = 10961747883538077879, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718923092.427327   11543 tpu_compile_op_common.cc:245] Compilation of 116ce4807ca55a24:0:0 with session name  took 5.476946951s and succeeded
I0000 00:00:1718923092.466155   11543 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(116ce4807ca55a24:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10198742_11905739117444458620", property.function_library_fingerprint = 10961747883538077879, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718923092.466423   11543 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 1/2


I0000 00:00:1718923192.288250   11562 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6ba3164af1de39b0:0:0), session_name()
I0000 00:00:1718923266.658443   11562 tpu_compile_op_common.cc:245] Compilation of 6ba3164af1de39b0:0:0 with session name  took 1m14.370131828s and succeeded
I0000 00:00:1718923266.899223   11562 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6ba3164af1de39b0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10234247_6573328248341824880", property.function_library_fingerprint = 10336748232241757548, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_

Epoch 2/2
File ./saved_models/disaster_BERT_prePost_0_model_trial_108_accuracy_0.8482_avg_score_0.7946_f1_0.7411_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_108_accuracy_0.8482_avg_score_0.7946_f1_0.7411_post_fine_tuning_submission.csv has been removed.


[I 2024-06-20 22:42:27,366] Trial 333 finished with value: 0.8508403301239014 and parameters: {'num_epochs': 7, 'dropout_rate': 0.2460953227202246, 'weight_decay': 0.0398445174080303, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.59590081742729}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718923617.195683   11574 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(db9a171b78473fbb:0:0), session_name()
I0000 00:00:1718923664.725340   11574 tpu_compile_op_common.cc:245] Compilation of db9a171b78473fbb:0:0 with session name  took 47.529583666s and succeeded
I0000 00:00:1718923664.945415   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(db9a171b78473fbb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10401394_9499568234408426829", property.function_library_fingerprint = 10667110192267060849, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718923710.306845   11559 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c2bed6e1d52ba339:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10401394_9499568234408426829", property.function_library_fingerprint = 10667110192267060849, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718923710.308613   11559 tpu_compilation_cache_interface.cc:541] After adding entry for key c2bed6e1d52ba339:0:0 with session_name  cache is 285 entries (56368273437 bytes),  marked for eviction 242 entries (47912420980 bytes).
I0000 00



I0000 00:00:1718923750.305975   11585 tpu_compile_op_common.cc:245] Compilation of 83189a8e0fa20f72:0:0 with session name  took 6.300764877s and succeeded
I0000 00:00:1718923750.360452   11585 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(83189a8e0fa20f72:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10438443_15119011597187790063", property.function_library_fingerprint = 13821514587666626153, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718923750.360736   11585 tpu_compilation_cache_interface.cc:541] After adding en

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.780910074710846 and accuracy: 0.8434873819351196


[I 2024-06-20 22:49:40,806] Trial 341 finished with value: 0.8434873819351196 and parameters: {'num_epochs': 9, 'dropout_rate': 0.25128261502616256, 'weight_decay': 0.04071272495546133, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.30479411274526913}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718924049.381066   11545 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6cc9a3aa37780e79:0:0), session_name()
I0000 00:00:1718924098.323054   11545 tpu_compile_op_common.cc:245] Compilation of 6cc9a3aa37780e79:0:0 with session name  took 48.941900008s and succeeded
I0000 00:00:1718924098.524260   11545 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6cc9a3aa37780e79:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10593038_14385819213196921134", property.function_library_fingerprint = 13720764449867388244, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_s



I0000 00:00:1718924145.755577   11541 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9ff82638075a7ce9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10593038_14385819213196921134", property.function_library_fingerprint = 13720764449867388244, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718924145.758435   11541 tpu_compilation_cache_interface.cc:541] After adding entry for key 9ff82638075a7ce9:0:0 with session_name  cache is 289 entries (57122751639 bytes),  marked for eviction 246 entries (48666752325 bytes).
I0000 0



I0000 00:00:1718924187.629125   11580 tpu_compile_op_common.cc:245] Compilation of ac29b38a0bcefba9:0:0 with session name  took 6.654845549s and succeeded
I0000 00:00:1718924187.686624   11580 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ac29b38a0bcefba9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10630087_8431991826511572282", property.function_library_fingerprint = 12476151381436370300, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718924187.686983   11580 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7848322987556458 and accuracy: 0.8408613204956055


[I 2024-06-20 22:56:53,168] Trial 346 finished with value: 0.8408613204956055 and parameters: {'num_epochs': 8, 'dropout_rate': 0.23100801661051718, 'weight_decay': 0.03416521502601492, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.6353204501462825}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718924478.375869   11544 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6f7a99be0f377620:0:0), session_name()
I0000 00:00:1718924524.516481   11544 tpu_compile_op_common.cc:245] Compilation of 6f7a99be0f377620:0:0 with session name  took 46.140535928s and succeeded
I0000 00:00:1718924524.712995   11544 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6f7a99be0f377620:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10782930_1364376212030863753", property.function_library_fingerprint = 4377559175828342493, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718924596.242181   11553 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b46b8519a128d668:0:0), session_name()
I0000 00:00:1718924603.219184   11553 tpu_compile_op_common.cc:245] Compilation of b46b8519a128d668:0:0 with session name  took 6.976944728s and succeeded
I0000 00:00:1718924603.271069   11553 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b46b8519a128d668:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10819979_13001765584792698221", property.function_library_fingerprint = 5724683490621937191, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718924609.486455   11524 tpu_compile_op_common.cc:245] Compilation of c204f76aaac1d5e3:0:0 with session name  took 6.086315668s and succeeded
I0000 00:00:1718924609.529860   11524 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c204f76aaac1d5e3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10819979_13001765584792698221", property.function_library_fingerprint = 5724683490621937191, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718924609.530154   11524 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7581626772880554 and accuracy: 0.8403361439704895


[I 2024-06-20 23:03:58,156] Trial 352 finished with value: 0.8403361439704895 and parameters: {'num_epochs': 9, 'dropout_rate': 0.2843221213614633, 'weight_decay': 0.044829774393849574, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.35361878463311497}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718924904.088573   11518 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eba394c80e45f955:0:0), session_name()
I0000 00:00:1718924951.950828   11518 tpu_compile_op_common.cc:245] Compilation of eba394c80e45f955:0:0 with session name  took 47.862195263s and succeeded
I0000 00:00:1718924952.208091   11518 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eba394c80e45f955:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10974716_3494485068097738132", property.function_library_fingerprint = 11620223539295875863, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718924997.182711   11580 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4e900ea58c9225a3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10974716_3494485068097738132", property.function_library_fingerprint = 11620223539295875863, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718924997.185459   11580 tpu_compilation_cache_interface.cc:541] After adding entry for key 4e900ea58c9225a3:0:0 with session_name  cache is 297 entries (58631844525 bytes),  marked for eviction 254 entries (50175845779 bytes).
I0000 00



I0000 00:00:1718925035.653339   11532 tpu_compile_op_common.cc:245] Compilation of 62f84f88860f7474:0:0 with session name  took 5.68466425s and succeeded
I0000 00:00:1718925035.697591   11532 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(62f84f88860f7474:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11011807_10134723131730075872", property.function_library_fingerprint = 17510120538830156116, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718925035.698006   11532 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7627052664756775 and accuracy: 0.8471638560295105


[I 2024-06-20 23:10:59,446] Trial 358 finished with value: 0.8471638560295105 and parameters: {'num_epochs': 7, 'dropout_rate': 0.24209211444229026, 'weight_decay': 0.04869200767693029, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.36687883466004334}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718925323.653317   11572 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(16fcbea17c887db4:0:0), session_name()
I0000 00:00:1718925370.020635   11572 tpu_compile_op_common.cc:245] Compilation of 16fcbea17c887db4:0:0 with session name  took 46.367248288s and succeeded
I0000 00:00:1718925370.209168   11572 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(16fcbea17c887db4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11162898_2937150649086884388", property.function_library_fingerprint = 11130740120784738488, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718925413.060519   11587 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3ad46cbac31d54ae:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11162898_2937150649086884388", property.function_library_fingerprint = 11130740120784738488, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718925413.061182   11587 tpu_compilation_cache_interface.cc:541] After adding entry for key 3ad46cbac31d54ae:0:0 with session_name  cache is 301 entries (59386322407 bytes),  marked for eviction 258 entries (50930323909 bytes).
I0000 00



I0000 00:00:1718925451.779486   11585 tpu_compile_op_common.cc:245] Compilation of 8983c2796dcbfc5f:0:0 with session name  took 5.462255467s and succeeded
I0000 00:00:1718925451.822568   11585 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8983c2796dcbfc5f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11199947_6379801605812386711", property.function_library_fingerprint = 2114455594887964481, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718925451.822852   11585 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7722448110580444 and accuracy: 0.8413865566253662


[I 2024-06-20 23:17:54,072] Trial 363 finished with value: 0.8413865566253662 and parameters: {'num_epochs': 7, 'dropout_rate': 0.24718221024695888, 'weight_decay': 0.04269224859414077, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.39028929368129456}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718925740.210447   11594 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(833174dcf20ecbc1:0:0), session_name()
I0000 00:00:1718925786.191874   11594 tpu_compile_op_common.cc:245] Compilation of 833174dcf20ecbc1:0:0 with session name  took 45.981353521s and succeeded
I0000 00:00:1718925786.383274   11594 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(833174dcf20ecbc1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11351038_12085516358547978583", property.function_library_fingerprint = 4447718350417240768, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718925856.141840   11548 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(15859cbe79a60f42:0:0), session_name()
I0000 00:00:1718925862.749143   11548 tpu_compile_op_common.cc:245] Compilation of 15859cbe79a60f42:0:0 with session name  took 6.607237312s and succeeded
I0000 00:00:1718925862.804621   11548 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(15859cbe79a60f42:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11388087_16880784239747618737", property.function_library_fingerprint = 3481446416080899821, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718925869.170484   11547 tpu_compile_op_common.cc:245] Compilation of bdf7677ecb2d1bc9:0:0 with session name  took 6.220172946s and succeeded
I0000 00:00:1718925869.224128   11547 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bdf7677ecb2d1bc9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11388087_16880784239747618737", property.function_library_fingerprint = 3481446416080899821, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718925869.224676   11547 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/3
Epoch 3/3
f1 score: 0.7152396440505981 and accuracy: 0.8413865566253662


[I 2024-06-20 23:24:38,728] Trial 368 finished with value: 0.8413865566253662 and parameters: {'num_epochs': 3, 'dropout_rate': 0.2537304682294199, 'weight_decay': 0.038809788833849806, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3147856742690571}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718926144.492233   11531 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e92e9977ca76513a:0:0), session_name()
I0000 00:00:1718926192.714171   11531 tpu_compile_op_common.cc:245] Compilation of e92e9977ca76513a:0:0 with session name  took 48.22186381s and succeeded
I0000 00:00:1718926192.915461   11531 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e92e9977ca76513a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11532312_18271371485420583534", property.function_library_fingerprint = 17973497790738297895, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718926238.506910   11540 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(61b94e4f810a2f54:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11532312_18271371485420583534", property.function_library_fingerprint = 17973497790738297895, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718926238.509182   11540 tpu_compilation_cache_interface.cc:541] After adding entry for key 61b94e4f810a2f54:0:0 with session_name  cache is 309 entries (60895415613 bytes),  marked for eviction 265 entries (52351709937 bytes).
I0000 0



I0000 00:00:1718926280.021146   11517 tpu_compile_op_common.cc:245] Compilation of 57426a95545f3639:0:0 with session name  took 6.669051955s and succeeded
I0000 00:00:1718926280.078711   11517 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(57426a95545f3639:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11569403_9792710110278427798", property.function_library_fingerprint = 7014748338163507599, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718926280.079028   11517 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7708142995834351 and accuracy: 0.8177521228790283


[I 2024-06-20 23:31:45,315] Trial 374 finished with value: 0.8177521228790283 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2598813938967126, 'weight_decay': 0.031948856236400626, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.23384737112952408}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718926575.242171   11528 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2f5e909963ca0f24:0:0), session_name()
I0000 00:00:1718926623.049385   11528 tpu_compile_op_common.cc:245] Compilation of 2f5e909963ca0f24:0:0 with session name  took 47.807126609s and succeeded
I0000 00:00:1718926623.291554   11528 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2f5e909963ca0f24:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11722388_10208168302965627374", property.function_library_fingerprint = 329054682268952317, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718926694.069066   11519 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9379c8a301de6f71:0:0), session_name()
I0000 00:00:1718926701.278214   11519 tpu_compile_op_common.cc:245] Compilation of 9379c8a301de6f71:0:0 with session name  took 7.209094915s and succeeded
I0000 00:00:1718926701.332975   11519 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9379c8a301de6f71:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11759479_12169899707925504022", property.function_library_fingerprint = 15457084734825635977, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si



I0000 00:00:1718926708.047650   11550 tpu_compile_op_common.cc:245] Compilation of 11bbae89759f3d97:0:0 with session name  took 6.598127519s and succeeded
I0000 00:00:1718926708.096045   11550 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(11bbae89759f3d97:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11759479_12169899707925504022", property.function_library_fingerprint = 15457084734825635977, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718926708.096369   11550 tpu_compilation_cache_interface.cc:541] After adding en

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7667794823646545 and accuracy: 0.8340336084365845


[I 2024-06-20 23:38:56,711] Trial 380 finished with value: 0.8340336084365845 and parameters: {'num_epochs': 9, 'dropout_rate': 0.2935853776573657, 'weight_decay': 0.045602392822163926, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.569846504803406}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718927003.512966   11518 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b6409451bd371c8f:0:0), session_name()
I0000 00:00:1718927049.783686   11518 tpu_compile_op_common.cc:245] Compilation of b6409451bd371c8f:0:0 with session name  took 46.270622971s and succeeded
I0000 00:00:1718927049.996341   11518 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b6409451bd371c8f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11914216_8088053797260292998", property.function_library_fingerprint = 11657123676131282943, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718927121.478545   11574 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(58e191150637e18c:0:0), session_name()
I0000 00:00:1718927128.881748   11574 tpu_compile_op_common.cc:245] Compilation of 58e191150637e18c:0:0 with session name  took 7.403113198s and succeeded
I0000 00:00:1718927128.938975   11574 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(58e191150637e18c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11951307_2022216750402612331", property.function_library_fingerprint = 13841396921007562287, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718927135.633738   11585 tpu_compile_op_common.cc:245] Compilation of dd02e9cfd0740ff:0:0 with session name  took 6.537950997s and succeeded
I0000 00:00:1718927135.688159   11585 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dd02e9cfd0740ff:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11951307_2022216750402612331", property.function_library_fingerprint = 13841396921007562287, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718927135.688476   11585 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.758586585521698 and accuracy: 0.8392857313156128


[I 2024-06-20 23:46:04,845] Trial 385 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 9, 'dropout_rate': 0.2762264316380801, 'weight_decay': 0.04116235184526799, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.33442473953428414}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718927430.950362   11558 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f729e3a388f74669:0:0), session_name()
I0000 00:00:1718927477.534158   11558 tpu_compile_op_common.cc:245] Compilation of f729e3a388f74669:0:0 with session name  took 46.583724s and succeeded
I0000 00:00:1718927477.744510   11558 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f729e3a388f74669:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12105902_16214988975079591942", property.function_library_fingerprint = 18188121825478067597, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718927551.911789   11547 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f74ce75eef5a6e6:0:0), session_name()
I0000 00:00:1718927559.669815   11547 tpu_compile_op_common.cc:245] Compilation of f74ce75eef5a6e6:0:0 with session name  took 7.757953846s and succeeded
I0000 00:00:1718927559.727413   11547 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f74ce75eef5a6e6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12142951_15959525549842642721", property.function_library_fingerprint = 15082998867294336824, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718927566.405024   11579 tpu_compile_op_common.cc:245] Compilation of 149adeeb83bce0d4:0:0 with session name  took 6.553129481s and succeeded
I0000 00:00:1718927566.462225   11579 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(149adeeb83bce0d4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12142951_15959525549842642721", property.function_library_fingerprint = 15082998867294336824, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718927566.462844   11579 tpu_compilation_cache_interface.cc:541] After adding en

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7708663940429688 and accuracy: 0.8445377945899963


[I 2024-06-20 23:53:15,312] Trial 391 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 8, 'dropout_rate': 0.23706360618244376, 'weight_decay': 0.040586740408826684, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.2814424220081528}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718927861.302366   11582 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a00bcb37c8fcadb:0:0), session_name()
I0000 00:00:1718927910.660658   11582 tpu_compile_op_common.cc:245] Compilation of a00bcb37c8fcadb:0:0 with session name  took 49.358213708s and succeeded
I0000 00:00:1718927910.872652   11582 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a00bcb37c8fcadb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12295794_3808737550197121312", property.function_library_fingerprint = 18272266458525610231, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718927955.758388   11593 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(69388a28a23a032f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12295794_3808737550197121312", property.function_library_fingerprint = 18272266458525610231, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718927955.759598   11593 tpu_compilation_cache_interface.cc:541] After adding entry for key 69388a28a23a032f:0:0 with session_name  cache is 325 entries (63913602305 bytes),  marked for eviction 280 entries (55538085338 bytes).
I0000 00



I0000 00:00:1718927996.412089   11589 tpu_compile_op_common.cc:245] Compilation of e9cb988b0ed1ab1a:0:0 with session name  took 6.729301721s and succeeded
I0000 00:00:1718927996.462562   11589 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e9cb988b0ed1ab1a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12332843_7132765862769128607", property.function_library_fingerprint = 10628041084717993731, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718927996.463050   11589 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7675396203994751 and accuracy: 0.8497899174690247


2024-06-21 00:00:50.006397: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718928052.078819   11516 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5f7cbb3033f74ce9:0:0), session_name()
I0000 00:00:1718928057.884515   11516 tpu_compile_op_common.cc:245] Compilation of 5f7cbb3033f74ce9:0:0 with session name  took 5.805627755s and succeeded
I0000 00:00:1718928057.921616   11516 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5f7cbb3033f74ce9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8768249220389015480", property.function_library_fingerprint = 13608318253526552380, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718928072.490602   11525 tpu_compile_op_common.cc:245] Compilation of f5e1be50106b4478:0:0 with session name  took 6.658470735s and succeeded
I0000 00:00:1718928072.550323   11525 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f5e1be50106b4478:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12360499_11511812910862939951", property.function_library_fingerprint = 13608318253526552380, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718928072.550642   11525 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 1/3


I0000 00:00:1718928171.589659   11605 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d4685e01bb8ca8:0:0), session_name()
I0000 00:00:1718928259.911338   11605 tpu_compile_op_common.cc:245] Compilation of d4685e01bb8ca8:0:0 with session name  took 1m28.321622379s and succeeded
I0000 00:00:1718928260.183728   11605 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d4685e01bb8ca8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12396004_3321315940334505770", property.function_library_fingerprint = 10985111457378373896, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size =

Epoch 2/3
Epoch 3/3
File ./saved_models/disaster_BERT_prePost_0_model_trial_71_accuracy_0.8487_avg_score_0.8047_f1_0.7607_pre_fine_tuning_submission.csv has been removed.
File ./saved_models/disaster_BERT_prePost_0_model_trial_71_accuracy_0.8487_avg_score_0.8047_f1_0.7607_post_fine_tuning_submission.csv has been removed.


[I 2024-06-21 00:05:48,408] Trial 396 finished with value: 0.8497899174690247 and parameters: {'num_epochs': 10, 'dropout_rate': 0.3097037147570001, 'weight_decay': 0.043599227684864676, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.33508186384553973}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718928616.467104   11554 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1ef7174c776aa27f:0:0), session_name()
I0000 00:00:1718928664.950388   11554 tpu_compile_op_common.cc:245] Compilation of 1ef7174c776aa27f:0:0 with session name  took 48.483207744s and succeeded
I0000 00:00:1718928665.206176   11554 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1ef7174c776aa27f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12563765_2583162547070066733", property.function_library_fingerprint = 17287614634508901974, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718928709.686798   11587 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(23eddeaf242491cc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12563765_2583162547070066733", property.function_library_fingerprint = 17287614634508901974, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718928709.688432   11587 tpu_compilation_cache_interface.cc:541] After adding entry for key 23eddeaf242491cc:0:0 with session_name  cache is 334 entries (65736233217 bytes),  marked for eviction 290 entries (57202356993 bytes).
I0000 00



I0000 00:00:1718928749.034060   11531 tpu_compile_op_common.cc:245] Compilation of faa27e15476c4919:0:0 with session name  took 6.492024807s and succeeded
I0000 00:00:1718928749.096853   11531 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(faa27e15476c4919:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12600814_11384983049916948947", property.function_library_fingerprint = 2689517096781022181, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718928749.097142   11531 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7579582929611206 and accuracy: 0.8445377945899963


[I 2024-06-21 00:12:59,739] Trial 404 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 9, 'dropout_rate': 0.3080046091201022, 'weight_decay': 0.04609676542843727, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.35458922672689225}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


I0000 00:00:1718929042.616472   11524 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1cad10eec069db87:0:0), session_name()
I0000 00:00:1718929090.678964   11524 tpu_compile_op_common.cc:245] Compilation of 1cad10eec069db87:0:0 with session name  took 48.062420042s and succeeded
I0000 00:00:1718929090.916723   11524 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1cad10eec069db87:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12755409_87754296150788094", property.function_library_fingerprint = 9001945042606840299, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718929136.593279   11597 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d14d80f2e4a5d676:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12755409_87754296150788094", property.function_library_fingerprint = 9001945042606840299, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718929136.595659   11597 tpu_compilation_cache_interface.cc:541] After adding entry for key d14d80f2e4a5d676:0:0 with session_name  cache is 338 entries (66490711051 bytes),  marked for eviction 294 entries (57956834851 bytes).
I0000 00:00



I0000 00:00:1718929176.202514   11567 tpu_compile_op_common.cc:245] Compilation of 57f91412aab8ec02:0:0 with session name  took 6.218638431s and succeeded
I0000 00:00:1718929176.248809   11567 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(57f91412aab8ec02:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12792458_1552573559876546570", property.function_library_fingerprint = 7324090389543902448, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718929176.249199   11567 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7738093733787537 and accuracy: 0.8392857313156128


[I 2024-06-21 00:20:12,832] Trial 410 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2972741767434185, 'weight_decay': 0.0447016410599846, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.35145959098974866}. Best is trial 112 with value: 0.8539915680885315.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8
