# This notebook uses the TPU on Kaggle and also uses the KeyWord argument

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/certification/BaltimoreCyberTrustRoot.crt.pem
/kaggle/input/nlp-getting-started/sample_submission.csv
/kaggle/input/nlp-getting-started/train.csv
/kaggle/input/nlp-getting-started/test.csv


In [None]:
import numpy as np
import pandas as pd
import random
import os
import re
import json
from transformers import set_seed, BertTokenizer, TFBertForSequenceClassification, BertConfig
import tensorflow as tf
%pip install optuna
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)
set_seed(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Install necessary packages for Azure SQL connection
%pip install mysql-connector-python 
%pip install PyMySQL

# Suppress TensorFlow logging
tf.get_logger().setLevel('ERROR')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Suppress other warnings (commented out for now)
# warnings.filterwarnings('ignore')

# Suppress absl TPU cache logging
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

# Additional configuration to suppress specific TPU cache logs
tf.autograph.set_verbosity(3)
tf.get_logger().propagate = False

# Load the training data
train_data = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
kaggle_test_data = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')

# Split the data into 75% training and 25% validation sets
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42, stratify=train_data['target'])

# Clean the text data
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)     # Remove mentions
    text = re.sub(r'\d+', '', text)      # Remove numbers
    text = re.sub(r'[^\w\s#]', '', text)  # Remove punctuation except hashtags
    text = text.lower()                  # Convert to lowercase
    return text

train_data['clean_text'] = train_data['text'].apply(clean_text)
val_data['clean_text'] = val_data['text'].apply(clean_text)
kaggle_test_data['clean_text'] = kaggle_test_data['text'].apply(clean_text)

# Function to combine keyword and text
def combine_keyword_and_text(row):
    keyword = str(row['keyword']) if pd.notna(row['keyword']) else ''
    text = row['clean_text']
    return '[CLS] ' + keyword + ' [SEP] ' + text + ' [SEP]'

# Apply the function to combine keyword and text
train_data['combined_text'] = train_data.apply(combine_keyword_and_text, axis=1)
val_data['combined_text'] = val_data.apply(combine_keyword_and_text, axis=1)
kaggle_test_data['combined_text'] = kaggle_test_data.apply(combine_keyword_and_text, axis=1)

# Tokenize the text data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=64,
        padding=True,
        truncation=True,
        return_tensors='tf'
    )

# Encode the combined text data
train_encodings = tokenize_texts(train_data['combined_text'])
val_encodings = tokenize_texts(val_data['combined_text'])
kaggle_test_encodings = tokenize_texts(kaggle_test_data['combined_text'])

train_labels = tf.convert_to_tensor(train_data['target'].values)
val_labels = tf.convert_to_tensor(val_data['target'].values)

def compute_metrics(predictions, labels):
    predictions = np.argmax(predictions, axis=1)
    f1 = f1_score(labels, predictions)
    accuracy = accuracy_score(labels, predictions)
    return {'f1': f1, 'accuracy': accuracy}

def create_tf_dataset(encodings, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((encodings, labels))
    return dataset.shuffle(10000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

# Initialize TPU
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    tpu_cores = strategy.num_replicas_in_sync
    print(f"TPU cores available: {tpu_cores}")
except ValueError:
    print("TPU not found")
    raise SystemExit

# Directory to save models
model_save_dir = './saved_models'
os.makedirs(model_save_dir, exist_ok=True)

# File to store top 5 model predictions
top_predictions_file = 'top_5_predictions.json'

# Load existing top 5 predictions
if os.path.exists(top_predictions_file):
    with open(top_predictions_file, 'r') as file:
        top_predictions = json.load(file)
else:
    top_predictions = []

# Function to save top predictions
def save_top_predictions(predictions_file, val_accuracy, model_number):
    global top_predictions
    
    new_entry = {
        'model_number': model_number,
        'val_accuracy': val_accuracy,
        'predictions_file': predictions_file
    }
    
    # Add the new entry and sort by validation accuracy
    top_predictions.append(new_entry)
    top_predictions = sorted(top_predictions, key=lambda x: x['val_accuracy'], reverse=True)
    
    # If there are more than 5 entries, remove the one with the lowest accuracy
    if len(top_predictions) > 5:
        removed_entry = top_predictions.pop()
        # Check if the file exists before attempting to remove it
        if os.path.exists(removed_entry['predictions_file']):
            os.remove(removed_entry['predictions_file'])
            print(f"File {removed_entry['predictions_file']} has been removed.")
        else:
            print(f"File {removed_entry['predictions_file']} does not exist and cannot be removed.")
    
    # Save the updated top predictions to file
    with open(top_predictions_file, 'w') as file:
        json.dump(top_predictions, file, indent=4)

# Track top 5 models
def objective(trial):
    # Set base learning rate and base batch size per core
    base_learning_rate = 1e-5
    base_batch_size_per_core = 16  # Change this to a smaller value for initial testing
    batch_size_per_core = trial.suggest_categorical("batch_size_per_core", [16, 32, 64, 128])
    batch_size = batch_size_per_core * tpu_cores
    learning_rate = base_learning_rate * (batch_size / (base_batch_size_per_core * tpu_cores))
    
    num_epochs = trial.suggest_int("num_epochs", 1, 20)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.1)
    lr_scheduler_type = trial.suggest_categorical("lr_scheduler_type", ["constant", "linear", "cosine", "cosine_with_restarts"])

    train_dataset = create_tf_dataset(dict(train_encodings), train_labels, batch_size)
    val_dataset = create_tf_dataset(dict(val_encodings), val_labels, batch_size)
    kaggle_test_dataset = tf.data.Dataset.from_tensor_slices(dict(kaggle_test_encodings)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    with strategy.scope():
        precision = tf.keras.metrics.Precision()
        recall = tf.keras.metrics.Recall()

        def f1_score_custom(y_true, y_pred):
            # Convert logits to predicted labels
            y_pred = tf.argmax(y_pred, axis=1)
            
            # Ensure true labels are in integer format
            y_true = tf.cast(y_true, tf.int64)
            
            # Update the state of precision and recall
            precision.update_state(y_true, y_pred)
            recall.update_state(y_true, y_pred)
            
            # Compute precision and recall values
            precision_result = precision.result()
            recall_result = recall.result()
            
            # Compute F1 score
            f1 = 2 * ((precision_result * recall_result) / (precision_result + recall_result + tf.keras.backend.epsilon()))
            
            return f1

        config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2, hidden_dropout_prob=dropout_rate)
        model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', config=config)

        if lr_scheduler_type == "linear":
            lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000,
                end_learning_rate=0.0,
                power=1.0
            )
        elif lr_scheduler_type == "cosine":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000
            )
        elif lr_scheduler_type == "cosine_with_restarts":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
                initial_learning_rate=learning_rate,
                first_decay_steps=1000
            )
        else:
            lr_schedule = learning_rate

        optimizer = tf.keras.optimizers.experimental.AdamW(
            learning_rate=lr_schedule,
            weight_decay=weight_decay,
            epsilon=1e-8
        )

        model.compile(
            optimizer=optimizer, 
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy'), f1_score_custom],
            steps_per_execution=32  # Experiment with different values like 16, 32, 64
        )

    model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset, verbose=1)

    # Evaluate on validation set
    val_loss, val_accuracy, val_f1_score = model.evaluate(val_dataset, verbose=1)
    print(f"f1 score: {val_f1_score} and accuracy: {val_accuracy}")
    
    avg_score = (val_accuracy + val_f1_score) / 2

    # If the model is in the top 5, proceed with fine-tuning and saving predictions
    if len(top_predictions) < 5 or val_accuracy > min(top_predictions, key=lambda x: x['val_accuracy'])['val_accuracy']:
        fine_tune_encodings = tokenize_texts(val_data['combined_text'])
        fine_tune_labels = tf.convert_to_tensor(val_data['target'].values)
        fine_tune_dataset = tf.data.Dataset.from_tensor_slices((
            dict(fine_tune_encodings),
            fine_tune_labels
        )).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

        # Calculate the ratio of training data size to epochs
        training_data_size = len(train_data)
        fine_tune_data_size = len(val_data)
        fine_tune_epochs = max(1, round((fine_tune_data_size / training_data_size) * num_epochs))

        model.fit(fine_tune_dataset, epochs=fine_tune_epochs, verbose=1)
        
        # Make predictions on the Kaggle test dataset
        kaggle_test_predictions = model.predict(kaggle_test_dataset).logits
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()

        # Save the predictions
        predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}" + '_submission.csv') 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(predictions_file, index=False)
    
        # Save the predictions and accuracy to the top 5 list
        save_top_predictions(predictions_file, val_accuracy, trial.number)

    return val_accuracy

# Define your Optuna study, using the MySQL connection string
optuna_storage = 'mysql+pymysql://<username>:<password>@<host>/<database>?ssl_ca=<path_to_CA_cert>&ssl_verify_cert=true'

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
db_password = user_secrets.get_secret("DB_PASSWORD")# This uses the secrets inside of Kaggle so I don't have to explicitly type my password out in code

# Example with your details (replace '<password>' with your real password and '<database>' with your database name)
optuna_storage = f'mysql+pymysql://MichaelAzure:{db_password}@kaggle-third-sql.mysql.database.azure.com/kaggle_disaster_database?ssl_ca=/kaggle/input/certification&ssl_verify_cert=true'

studyName = 'disaster_TPU_withKeyword_1'
study = optuna.create_study(study_name=studyName, # name of the study
                            storage=optuna_storage,  # URL for the mySQL schema
                            direction='maximize', # maximize the log loss
                            load_if_exists=True, # makes it so that if the study_name already exists in the schema, then it will append the new trials with the old trials and essentially resume the study. It will also remember the previous trials so it really is resuming the study
                            )

study.optimize(objective, n_trials=100)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# Save top predictions JSON file to output directory for later use
output_top_predictions_file = '/kaggle/working/top_5_predictions.json'
with open(output_top_predictions_file, 'w') as file:
    json.dump(top_predictions, file, indent=4)

print(f"Top 5 predictions saved to {output_top_predictions_file}")


  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1718315452.695869    5476 common_lib.cc:798] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="localhost"
=== Source Location Trace: ===
learning/45eac/tfrc/runtime/common_lib.cc:479
D0613 21:50:52.704648359    5476 config.cc:196]                        gRPC EXPERIMENT call_status_override_on_cancellation   OFF (default:OFF)
D0613 21:50:52.704665209    5476 config.cc:196]                        gRPC EXPERIMENT call_v3                                OFF (default:OFF)
D0613 21:50:52.704668881    5476 config.cc:196]                        gRPC EXPERIMENT canary_client_privacy                  ON  (default:ON)
D0613 21:50:52.704671421    5476 config.cc:196]                        gRPC EXPERIMENT capture_base_context                   ON  (default:ON)
D0613 21:50:52.704673914    5476 config.cc:196]                        gRPC EXPERI

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


I0000 00:00:1718315472.236799    5476 service.cc:145] XLA service 0x598c0e695980 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1718315472.236860    5476 service.cc:153]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1718315472.236865    5476 service.cc:153]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1718315472.236868    5476 service.cc:153]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1718315472.236871    5476 service.cc:153]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1718315472.236873    5476 service.cc:153]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1718315472.236876    5476 service.cc:153]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1718315472.236879    5476 service.cc:153]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1718315472.236883    5476 service.cc:153]   StreamExecutor device (7): TPU, 2a886c8


TPU cores available: 8


[I 2024-06-13 21:51:22,087] Using an existing study with name 'disaster_TPU_withKeyword_1' instead of creating a new one.
I0000 00:00:1718315485.281287    5476 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/13


I0000 00:00:1718315741.023402    6297 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c357089a7e6069ad:0:0), session_name()
I0000 00:00:1718315776.183966    6297 tpu_compile_op_common.cc:245] Compilation of c357089a7e6069ad:0:0 with session name  took 35.160498356s and succeeded
I0000 00:00:1718315776.345972    6297 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c357089a7e6069ad:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_123810_16841913849719589554", property.function_library_fingerprint = 3484322206690488708, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,50,;64,50,;64,50,;64,;", property.guaranteed_constants_size



I0000 00:00:1718315834.282452    6304 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6301397a6ef1780a:0:0), session_name()
I0000 00:00:1718315840.279120    6304 tpu_compile_op_common.cc:245] Compilation of 6301397a6ef1780a:0:0 with session name  took 5.996624198s and succeeded
I0000 00:00:1718315840.319458    6304 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6301397a6ef1780a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_157484_9863256191019750481", property.function_library_fingerprint = 10153545275897623449, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,48,;64,48,;64,48,;64,;", property.guaranteed_constants_size 



I0000 00:00:1718315846.828446    6311 tpu_compile_op_common.cc:245] Compilation of aba4e1ad30b1f7c6:0:0 with session name  took 6.432138005s and succeeded
I0000 00:00:1718315846.872079    6311 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aba4e1ad30b1f7c6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_157484_9863256191019750481", property.function_library_fingerprint = 10153545275897623449, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "46,48,;46,48,;46,48,;46,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718315846.872350    6311 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7875206470489502 and accuracy: 0.825630247592926
Epoch 1/4


I0000 00:00:1718315975.991666    6308 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(22723d2eb80bc809:0:0), session_name()
I0000 00:00:1718316033.685581    6308 tpu_compile_op_common.cc:245] Compilation of 22723d2eb80bc809:0:0 with session name  took 57.693866706s and succeeded
I0000 00:00:1718316033.942532    6308 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(22723d2eb80bc809:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_212658_1096700596127748243", property.function_library_fingerprint = 2705607908251889227, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,48,;64,48,;64,48,;64,;", property.guaranteed_constants_size 

Epoch 2/4
Epoch 3/4
Epoch 4/4


2024-06-13 22:01:51.545812: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718316113.333937    6285 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8ab8e2a432c9e095:0:0), session_name()
I0000 00:00:1718316118.418365    6285 tpu_compile_op_common.cc:245] Compilation of 8ab8e2a432c9e095:0:0 with session name  took 5.084374186s and succeeded
I0000 00:00:1718316118.449931    6285 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8ab8e2a432c9e095:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_12328312093152189029", property.function_library_fingerprint = 9262608898891195744, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718316130.090724    6312 tpu_compile_op_common.cc:245] Compilation of 30f5f2fa09c0ce7e:0:0 with session name  took 5.533728339s and succeeded
I0000 00:00:1718316130.123069    6312 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(30f5f2fa09c0ce7e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_248018_7935150944861925200", property.function_library_fingerprint = 9262608898891195744, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718316130.123298    6312 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 1/18


I0000 00:00:1718316376.437643    6259 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1e105742ce604353:0:0), session_name()
I0000 00:00:1718316409.025618    6259 tpu_compile_op_common.cc:245] Compilation of 1e105742ce604353:0:0 with session name  took 32.587921519s and succeeded
I0000 00:00:1718316409.219538    6259 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1e105742ce604353:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_376278_239569079257239265", property.function_library_fingerprint = 9129052386303022207, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size =



I0000 00:00:1718316411.230885    6297 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c25707e756222607:0:0), session_name()
I0000 00:00:1718316441.701449    6297 tpu_compile_op_common.cc:245] Compilation of c25707e756222607:0:0 with session name  took 30.470522266s and succeeded
I0000 00:00:1718316441.861135    6297 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c25707e756222607:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_376278_239569079257239265", property.function_library_fingerprint = 9129052386303022207, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size =



I0000 00:00:1718316468.131889    6302 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1208410de4dbc54:0:0), session_name()
I0000 00:00:1718316473.247757    6302 tpu_compile_op_common.cc:245] Compilation of 1208410de4dbc54:0:0 with session name  took 5.115826866s and succeeded
I0000 00:00:1718316473.286529    6302 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1208410de4dbc54:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_409944_2578464456116027874", property.function_library_fingerprint = 7510361866586262479, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718316478.593625    6245 tpu_compile_op_common.cc:245] Compilation of a25f851cb2ad025:0:0 with session name  took 5.155790607s and succeeded
I0000 00:00:1718316478.626283    6245 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a25f851cb2ad025:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_409944_2578464456116027874", property.function_library_fingerprint = 7510361866586262479, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718316478.626463    6245 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
f1 score: 0.8602474331855774 and accuracy: 0.8082982897758484
Epoch 1/6


I0000 00:00:1718316635.410122    6300 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cf7858b2c5ce7b46:0:0), session_name()
I0000 00:00:1718316696.360624    6300 tpu_compile_op_common.cc:245] Compilation of cf7858b2c5ce7b46:0:0 with session name  took 1m0.950443225s and succeeded
I0000 00:00:1718316696.564692    6300 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cf7858b2c5ce7b46:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_474714_18181632279295391062", property.function_library_fingerprint = 3396504309572527516, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_siz



I0000 00:00:1718316759.793402    6283 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b926c99e56d48339:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_474714_18181632279295391062", property.function_library_fingerprint = 3396504309572527516, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718316759.794564    6283 tpu_compilation_cache_interface.cc:541] After adding entry for key b926c99e56d48339:0:0 with session_name  cache is 15 entries (3346116755 bytes),  marked for eviction 0 entries (0 bytes).


Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


2024-06-13 22:13:06.635371: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718316788.385135    6227 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(971271a1d24b6b15:0:0), session_name()
I0000 00:00:1718316792.500208    6227 tpu_compile_op_common.cc:245] Compilation of 971271a1d24b6b15:0:0 with session name  took 4.115024988s and succeeded
I0000 00:00:1718316792.522748    6227 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(971271a1d24b6b15:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_4975846227887757846", property.function_library_fingerprint = 5310257886690688981, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718316802.187704    6261 tpu_compile_op_common.cc:245] Compilation of 44dad7d0125e19e7:0:0 with session name  took 4.421428119s and succeeded
I0000 00:00:1718316802.223655    6261 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(44dad7d0125e19e7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_511240_12296131966115110467", property.function_library_fingerprint = 5310257886690688981, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "8,49,;8,49,;8,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718316802.223930    6261 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 1/8


I0000 00:00:1718317048.141661    6229 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7fde927a7ccdaf77:0:0), session_name()
I0000 00:00:1718317081.035541    6229 tpu_compile_op_common.cc:245] Compilation of 7fde927a7ccdaf77:0:0 with session name  took 32.89382602s and succeeded
I0000 00:00:1718317081.230210    6229 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7fde927a7ccdaf77:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_639738_13358422699041971806", property.function_library_fingerprint = 7746053024361954720, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constants_s



I0000 00:00:1718317138.703163    6303 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(323bcf37374fa112:0:0), session_name()
I0000 00:00:1718317144.512295    6303 tpu_compile_op_common.cc:245] Compilation of 323bcf37374fa112:0:0 with session name  took 5.809084955s and succeeded
I0000 00:00:1718317144.560988    6303 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(323bcf37374fa112:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_673412_4938931603735164876", property.function_library_fingerprint = 11183999981532112249, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_s



I0000 00:00:1718317150.651471    6236 tpu_compile_op_common.cc:245] Compilation of d1f020dc6c118e6:0:0 with session name  took 6.03249441s and succeeded
I0000 00:00:1718317150.695962    6236 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d1f020dc6c118e6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_673412_4938931603735164876", property.function_library_fingerprint = 11183999981532112249, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "110,48,;110,48,;110,48,;110,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718317150.696155    6236 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7594338655471802 and accuracy: 0.8445377945899963
Epoch 1/3


I0000 00:00:1718317261.758582    6234 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(db4cbcbe0e38c66c:0:0), session_name()
I0000 00:00:1718317319.576117    6234 tpu_compile_op_common.cc:245] Compilation of db4cbcbe0e38c66c:0:0 with session name  took 57.817467481s and succeeded
I0000 00:00:1718317319.800723    6234 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(db4cbcbe0e38c66c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_719826_2471933716365567195", property.function_library_fingerprint = 353735653234428310, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_si

Epoch 2/3
Epoch 3/3


2024-06-13 22:23:20.381413: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718317402.117285    6222 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4acad6340c35deb7:0:0), session_name()
I0000 00:00:1718317407.934941    6222 tpu_compile_op_common.cc:245] Compilation of 4acad6340c35deb7:0:0 with session name  took 5.817606341s and succeeded
I0000 00:00:1718317407.968507    6222 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4acad6340c35deb7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8784009385772568876", property.function_library_fingerprint = 10973622506467270205, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718317419.256722    6228 tpu_compile_op_common.cc:245] Compilation of a3135c3617a1cd85:0:0 with session name  took 5.051246635s and succeeded
I0000 00:00:1718317419.297334    6228 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a3135c3617a1cd85:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_754572_8459493762068326890", property.function_library_fingerprint = 10973622506467270205, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718317419.297567    6228 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/19


I0000 00:00:1718317664.703530    6285 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3fbd850cc9ade7e3:0:0), session_name()
I0000 00:00:1718317698.322871    6285 tpu_compile_op_common.cc:245] Compilation of 3fbd850cc9ade7e3:0:0 with session name  took 33.619292947s and succeeded
I0000 00:00:1718317698.525585    6285 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3fbd850cc9ade7e3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_882912_13469072923008531136", property.function_library_fingerprint = 18266969518177680543, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constants



I0000 00:00:1718317754.247207    6301 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a7858b55428be03:0:0), session_name()
I0000 00:00:1718317760.483825    6301 tpu_compile_op_common.cc:245] Compilation of a7858b55428be03:0:0 with session name  took 6.236546032s and succeeded
I0000 00:00:1718317760.530641    6301 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a7858b55428be03:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_916540_9318548775145719675", property.function_library_fingerprint = 14023153366295683577, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_size



I0000 00:00:1718317766.562217    6223 tpu_compile_op_common.cc:245] Compilation of 3c2d2dbe475e207c:0:0 with session name  took 5.948867302s and succeeded
I0000 00:00:1718317766.605744    6223 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3c2d2dbe475e207c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_916540_9318548775145719675", property.function_library_fingerprint = 14023153366295683577, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "110,48,;110,48,;110,48,;110,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718317766.605943    6223 tpu_compilation_cache_interface.cc:541] After adding e

Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
f1 score: 0.8843141794204712 and accuracy: 0.8114495873451233
Epoch 1/6


I0000 00:00:1718317914.316408    6254 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a370cb866cbdcb0a:0:0), session_name()
I0000 00:00:1718317970.770419    6254 tpu_compile_op_common.cc:245] Compilation of a370cb866cbdcb0a:0:0 with session name  took 56.453950165s and succeeded
I0000 00:00:1718317970.982955    6254 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a370cb866cbdcb0a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_982184_12494193245741658051", property.function_library_fingerprint = 9047090237125271553, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


2024-06-13 22:34:17.989655: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718318059.853194    6273 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(af17618b3161084e:0:0), session_name()
I0000 00:00:1718318065.754432    6273 tpu_compile_op_common.cc:245] Compilation of af17618b3161084e:0:0 with session name  took 5.901196516s and succeeded
I0000 00:00:1718318065.782136    6273 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(af17618b3161084e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13764418614754354089", property.function_library_fingerprint = 13818046275385326520, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718318077.737500    6255 tpu_compile_op_common.cc:245] Compilation of 1a9f1705bd69c49a:0:0 with session name  took 5.213203088s and succeeded
I0000 00:00:1718318077.781258    6255 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1a9f1705bd69c49a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1018726_13796268037317896196", property.function_library_fingerprint = 13818046275385326520, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718318077.781550    6255 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 1/7


I0000 00:00:1718318323.939451    6255 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7d4c2ad82e4516d5:0:0), session_name()
I0000 00:00:1718318360.807941    6255 tpu_compile_op_common.cc:245] Compilation of 7d4c2ad82e4516d5:0:0 with session name  took 36.868440912s and succeeded
I0000 00:00:1718318361.007472    6255 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7d4c2ad82e4516d5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1147010_7963914397874077682", property.function_library_fingerprint = 10577197211849872847, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718318420.474877    6251 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7a0d0d9b2811793:0:0), session_name()
I0000 00:00:1718318427.047432    6251 tpu_compile_op_common.cc:245] Compilation of 7a0d0d9b2811793:0:0 with session name  took 6.572502204s and succeeded
I0000 00:00:1718318427.114112    6251 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7a0d0d9b2811793:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1180622_15055690089236335098", property.function_library_fingerprint = 8478654637430513953, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_size = 



I0000 00:00:1718318433.286360    6281 tpu_compile_op_common.cc:245] Compilation of 2d06b20b0f7e1d5f:0:0 with session name  took 6.072606636s and succeeded
I0000 00:00:1718318433.330744    6281 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2d06b20b0f7e1d5f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1180622_15055690089236335098", property.function_library_fingerprint = 8478654637430513953, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718318433.330930    6281 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7917304635047913 and accuracy: 0.8482142686843872
Epoch 1/2


I0000 00:00:1718318543.166205    6253 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9f9da55d5b053975:0:0), session_name()
I0000 00:00:1718318613.660478    6253 tpu_compile_op_common.cc:245] Compilation of 9f9da55d5b053975:0:0 with session name  took 1m10.494213379s and succeeded
I0000 00:00:1718318613.941740    6253 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9f9da55d5b053975:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1225226_3010624836618092782", property.function_library_fingerprint = 17465689503328590653, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_s

Epoch 2/2


2024-06-13 22:44:58.423051: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718318700.185487    6244 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cbd82b15118742e8:0:0), session_name()
I0000 00:00:1718318705.117590    6244 tpu_compile_op_common.cc:245] Compilation of cbd82b15118742e8:0:0 with session name  took 4.932055948s and succeeded
I0000 00:00:1718318705.155201    6244 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cbd82b15118742e8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_16266606088457332982", property.function_library_fingerprint = 2062166155153515688, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718318716.514245    6310 tpu_compile_op_common.cc:245] Compilation of a02fe6b22ef567f8:0:0 with session name  took 5.509849783s and succeeded
I0000 00:00:1718318716.551966    6310 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a02fe6b22ef567f8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1259296_12762068064258856019", property.function_library_fingerprint = 2062166155153515688, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718318716.552165    6310 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/5


I0000 00:00:1718318964.577562    6249 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(771f114889357a82:0:0), session_name()
I0000 00:00:1718319000.111250    6249 tpu_compile_op_common.cc:245] Compilation of 771f114889357a82:0:0 with session name  took 35.533624931s and succeeded
I0000 00:00:1718319000.342529    6249 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(771f114889357a82:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1387794_8108930452188632886", property.function_library_fingerprint = 12881174617336170999, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718319057.550936    6238 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cc8dd0640a164d00:0:0), session_name()
I0000 00:00:1718319063.214995    6238 tpu_compile_op_common.cc:245] Compilation of cc8dd0640a164d00:0:0 with session name  took 5.66401544s and succeeded
I0000 00:00:1718319063.256754    6238 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc8dd0640a164d00:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1421468_1758556869918934133", property.function_library_fingerprint = 2474927610983372529, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_size =



I0000 00:00:1718319068.558030    6280 tpu_compile_op_common.cc:245] Compilation of b26db17c1e265ce3:0:0 with session name  took 5.177794637s and succeeded
I0000 00:00:1718319068.595909    6280 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b26db17c1e265ce3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1421468_1758556869918934133", property.function_library_fingerprint = 2474927610983372529, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718319068.596141    6280 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.754784882068634 and accuracy: 0.8329831957817078
Epoch 1/2


I0000 00:00:1718319173.906138    6271 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(83c5e0f448a4ec19:0:0), session_name()
I0000 00:00:1718319236.705759    6271 tpu_compile_op_common.cc:245] Compilation of 83c5e0f448a4ec19:0:0 with session name  took 1m2.799577453s and succeeded
I0000 00:00:1718319236.969351    6271 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(83c5e0f448a4ec19:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1462626_11613841367540951799", property.function_library_fingerprint = 11303280227900645073, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_s

Epoch 2/2


2024-06-13 22:55:22.243387: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718319324.147316    6271 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6433312dd72f9ba6:0:0), session_name()
I0000 00:00:1718319329.076382    6271 tpu_compile_op_common.cc:245] Compilation of 6433312dd72f9ba6:0:0 with session name  took 4.929013667s and succeeded
I0000 00:00:1718319329.107353    6271 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6433312dd72f9ba6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_9636468244379078160", property.function_library_fingerprint = 4871009999916892389, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718319340.253526    6287 tpu_compile_op_common.cc:245] Compilation of bb406c25bcc97b9b:0:0 with session name  took 5.264387844s and succeeded
I0000 00:00:1718319340.291971    6287 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bb406c25bcc97b9b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1496758_7858634488163111673", property.function_library_fingerprint = 4871009999916892389, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718319340.292136    6287 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/10


I0000 00:00:1718319589.060611    6309 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f8337997a9713ca6:0:0), session_name()
I0000 00:00:1718319623.444757    6309 tpu_compile_op_common.cc:245] Compilation of f8337997a9713ca6:0:0 with session name  took 34.384094166s and succeeded
I0000 00:00:1718319623.654557    6309 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f8337997a9713ca6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1625042_9355380753837751090", property.function_library_fingerprint = 5498304557486958550, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_size



I0000 00:00:1718319656.435381    6253 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d8c7c462f3c37cb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1625042_9355380753837751090", property.function_library_fingerprint = 5498304557486958550, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718319656.436964    6253 tpu_compilation_cache_interface.cc:541] After adding entry for key d8c7c462f3c37cb:0:0 with session_name  cache is 56 entries (11923593730 bytes),  marked for eviction 15 entries (3346116755 bytes).
I0000 00:00:171



I0000 00:00:1718319692.607678    6239 tpu_compile_op_common.cc:245] Compilation of 4b65bc19bc470836:0:0 with session name  took 5.292352195s and succeeded
I0000 00:00:1718319692.649211    6239 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4b65bc19bc470836:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1658654_3514059833992168292", property.function_library_fingerprint = 12903859351409581814, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718319692.649412    6239 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.824435293674469 and accuracy: 0.8177521228790283
Epoch 1/3


I0000 00:00:1718319814.573655    6285 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6290b9b00ceae81d:0:0), session_name()
I0000 00:00:1718319877.173344    6285 tpu_compile_op_common.cc:245] Compilation of 6290b9b00ceae81d:0:0 with session name  took 1m2.599621811s and succeeded
I0000 00:00:1718319877.444763    6285 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6290b9b00ceae81d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1708514_8278649537254891115", property.function_library_fingerprint = 5183534858805800964, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718319940.052676    6271 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8333b1d26a027179:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1708514_8278649537254891115", property.function_library_fingerprint = 5183534858805800964, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718319940.054070    6271 tpu_compilation_cache_interface.cc:541] After adding entry for key 8333b1d26a027179:0:0 with session_name  cache is 60 entries (12873970653 bytes),  marked for eviction 21 entries (4298994379 bytes).


Epoch 2/3
Epoch 3/3


2024-06-13 23:06:02.982345: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718319964.875055    6284 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5f5fa0f1f84ea40a:0:0), session_name()
I0000 00:00:1718319969.486271    6284 tpu_compile_op_common.cc:245] Compilation of 5f5fa0f1f84ea40a:0:0 with session name  took 4.611174445s and succeeded
I0000 00:00:1718319969.514749    6284 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5f5fa0f1f84ea40a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_18125206491396333760", property.function_library_fingerprint = 9651193434460649415, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718319980.356159    6287 tpu_compile_op_common.cc:245] Compilation of 4566d397f8526064:0:0 with session name  took 4.985189373s and succeeded
I0000 00:00:1718319980.392477    6287 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4566d397f8526064:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1743198_3298035244090494431", property.function_library_fingerprint = 9651193434460649415, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718319980.392696    6287 tpu_compilation_cache_interface.cc:541] After adding entry for



I0000 00:00:1718320319.114118    6285 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(48daae17be9e8fe1:0:0), session_name()
I0000 00:00:1718320325.335705    6285 tpu_compile_op_common.cc:245] Compilation of 48daae17be9e8fe1:0:0 with session name  took 6.221526292s and succeeded
I0000 00:00:1718320325.395812    6285 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(48daae17be9e8fe1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1905370_10458956682498336151", property.function_library_fingerprint = 3851073529051930137, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_size



I0000 00:00:1718320331.092171    6295 tpu_compile_op_common.cc:245] Compilation of 7cf7924f9fc655b0:0:0 with session name  took 5.594351545s and succeeded
I0000 00:00:1718320331.139139    6295 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7cf7924f9fc655b0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1905370_10458956682498336151", property.function_library_fingerprint = 3851073529051930137, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718320331.139566    6295 tpu_compilation_cache_interface.cc:541] After adding entr

f1 score: 0.6665323376655579 and accuracy: 0.7983193397521973


[I 2024-06-13 23:12:14,061] Trial 30 finished with value: 0.7983193397521973 and parameters: {'batch_size_per_core': 32, 'num_epochs': 1, 'dropout_rate': 0.3170586446980418, 'weight_decay': 0.016129456256853105, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/13


I0000 00:00:1718320574.936026    6307 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b0fca083e9d2983f:0:0), session_name()
I0000 00:00:1718320607.931935    6307 tpu_compile_op_common.cc:245] Compilation of b0fca083e9d2983f:0:0 with session name  took 32.99585247s and succeeded
I0000 00:00:1718320608.095745    6307 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b0fca083e9d2983f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2035740_6604992430463816248", property.function_library_fingerprint = 1278101847366772945, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,50,;64,50,;64,50,;64,;", property.guaranteed_constants_size 



I0000 00:00:1718320666.954480    6252 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b914a1668283989:0:0), session_name()
I0000 00:00:1718320673.369520    6252 tpu_compile_op_common.cc:245] Compilation of b914a1668283989:0:0 with session name  took 6.414987577s and succeeded
I0000 00:00:1718320673.418563    6252 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b914a1668283989:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2069372_12375299481544872918", property.function_library_fingerprint = 1232761213962624579, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,48,;64,48,;64,48,;64,;", property.guaranteed_constants_size = 



I0000 00:00:1718320679.870097    6267 tpu_compile_op_common.cc:245] Compilation of 3306bf2056b42401:0:0 with session name  took 6.372542546s and succeeded
I0000 00:00:1718320679.919268    6267 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3306bf2056b42401:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2069372_12375299481544872918", property.function_library_fingerprint = 1232761213962624579, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "46,48,;46,48,;46,48,;46,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718320679.919518    6267 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.8244945406913757 and accuracy: 0.8314075469970703
Epoch 1/4


I0000 00:00:1718320807.201315    6297 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f24beb63ded3a880:0:0), session_name()
I0000 00:00:1718320860.585907    6297 tpu_compile_op_common.cc:245] Compilation of f24beb63ded3a880:0:0 with session name  took 53.384513764s and succeeded
I0000 00:00:1718320860.828731    6297 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f24beb63ded3a880:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2124508_13426703647781468561", property.function_library_fingerprint = 8146774454872777725, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,48,;64,48,;64,48,;64,;", property.guaranteed_constants_siz

Epoch 2/4
Epoch 3/4
Epoch 4/4


2024-06-13 23:22:24.730024: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718320946.571287    6229 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8dda8afb7bc8fe0c:0:0), session_name()
I0000 00:00:1718320951.979103    6229 tpu_compile_op_common.cc:245] Compilation of 8dda8afb7bc8fe0c:0:0 with session name  took 5.407768831s and succeeded
I0000 00:00:1718320952.008263    6229 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8dda8afb7bc8fe0c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_9578243554201773901", property.function_library_fingerprint = 5347481324789213104, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718320963.160850    6303 tpu_compile_op_common.cc:245] Compilation of 998cc505cdcf298c:0:0 with session name  took 5.375970406s and succeeded
I0000 00:00:1718320963.210099    6303 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(998cc505cdcf298c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2159826_13597069220705689856", property.function_library_fingerprint = 5347481324789213104, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718320963.210320    6303 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/4


I0000 00:00:1718321209.483793    6274 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a2c518c7163d5cbf:0:0), session_name()
I0000 00:00:1718321244.175930    6274 tpu_compile_op_common.cc:245] Compilation of a2c518c7163d5cbf:0:0 with session name  took 34.692088742s and succeeded
I0000 00:00:1718321244.348897    6274 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a2c518c7163d5cbf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2288110_5097058883075445540", property.function_library_fingerprint = 15821679110383137046, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constants



I0000 00:00:1718321303.115554    6262 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ebf1214bc944965d:0:0), session_name()
I0000 00:00:1718321309.432968    6262 tpu_compile_op_common.cc:245] Compilation of ebf1214bc944965d:0:0 with session name  took 6.31735121s and succeeded
I0000 00:00:1718321309.472840    6262 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ebf1214bc944965d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2321722_95204877120922592", property.function_library_fingerprint = 616694792102780673, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_size 



I0000 00:00:1718321315.590977    6219 tpu_compile_op_common.cc:245] Compilation of f78c71c40195c2a5:0:0 with session name  took 6.065949835s and succeeded
I0000 00:00:1718321315.639082    6219 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f78c71c40195c2a5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2321722_95204877120922592", property.function_library_fingerprint = 616694792102780673, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "110,48,;110,48,;110,48,;110,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718321315.639356    6219 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7016034126281738 and accuracy: 0.8282563090324402


I0000 00:00:1718321418.935632    6249 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f18faa1cb97a2680:0:0), session_name()
I0000 00:00:1718321481.178893    6249 tpu_compile_op_common.cc:245] Compilation of f18faa1cb97a2680:0:0 with session name  took 1m2.243183767s and succeeded
I0000 00:00:1718321481.497004    6249 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f18faa1cb97a2680:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2361070_15752512150713044078", property.function_library_fingerprint = 8116645068236111534, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constant



2024-06-13 23:32:43.589679: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718321565.525088    6267 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(99c2138ca81c183d:0:0), session_name()
I0000 00:00:1718321571.484349    6267 tpu_compile_op_common.cc:245] Compilation of 99c2138ca81c183d:0:0 with session name  took 5.959211138s and succeeded
I0000 00:00:1718321571.522349    6267 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(99c2138ca81c183d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_11175657941635721383", property.function_library_fingerprint = 17966244883290504584, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718321584.410776    6238 tpu_compile_op_common.cc:245] Compilation of 5628328236dd2428:0:0 with session name  took 5.866885134s and succeeded
I0000 00:00:1718321584.465090    6238 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5628328236dd2428:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2394526_12052985912697507058", property.function_library_fingerprint = 17966244883290504584, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718321584.465282    6238 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 1/7


I0000 00:00:1718321842.833715    6223 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(104fc713457b6582:0:0), session_name()
I0000 00:00:1718321877.698896    6223 tpu_compile_op_common.cc:245] Compilation of 104fc713457b6582:0:0 with session name  took 34.865131343s and succeeded
I0000 00:00:1718321877.924923    6223 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(104fc713457b6582:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2522786_1730155008105702272", property.function_library_fingerprint = 12693755921375104211, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_siz



I0000 00:00:1718321879.972072    6241 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2a5ce4860cc5e418:0:0), session_name()
I0000 00:00:1718321914.291856    6241 tpu_compile_op_common.cc:245] Compilation of 2a5ce4860cc5e418:0:0 with session name  took 34.319738483s and succeeded




I0000 00:00:1718321914.502127    6241 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2a5ce4860cc5e418:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2522786_1730155008105702272", property.function_library_fingerprint = 12693755921375104211, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718321914.502977    6241 tpu_compilation_cache_interface.cc:541] After adding entry for key 2a5ce4860cc5e418:0:0 with session_name  cache is 87 entries (18415793884 bytes),  marked for eviction 46 entries (9911916463 bytes).
I0000 00:00:



I0000 00:00:1718321951.760184    6243 tpu_compile_op_common.cc:245] Compilation of 80fb72a82fb54e28:0:0 with session name  took 5.799842291s and succeeded
I0000 00:00:1718321951.806217    6243 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(80fb72a82fb54e28:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2556452_6083561859816723585", property.function_library_fingerprint = 3113707584308861830, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718321951.806391    6243 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7940270900726318 and accuracy: 0.8177521228790283


[I 2024-06-13 23:39:39,302] Trial 42 finished with value: 0.8177521228790283 and parameters: {'batch_size_per_core': 16, 'num_epochs': 7, 'dropout_rate': 0.19545041356292248, 'weight_decay': 0.04185368255283959, 'lr_scheduler_type': 'constant'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718322234.580820    6292 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a148fb5479a4fbea:0:0), session_name()
I0000 00:00:1718322268.610846    6292 tpu_compile_op_common.cc:245] Compilation of a148fb5479a4fbea:0:0 with session name  took 34.029969712s and succeeded
I0000 00:00:1718322268.806187    6292 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a148fb5479a4fbea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2697776_16028661774501077347", property.function_library_fingerprint = 17951263807029026152, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constant



I0000 00:00:1718322329.599241    6225 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b250c9f075aaafd8:0:0), session_name()
I0000 00:00:1718322336.212968    6225 tpu_compile_op_common.cc:245] Compilation of b250c9f075aaafd8:0:0 with session name  took 6.613670801s and succeeded
I0000 00:00:1718322336.271362    6225 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b250c9f075aaafd8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2731450_10505442023535809886", property.function_library_fingerprint = 4621837184129627120, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_



I0000 00:00:1718322342.709935    6300 tpu_compile_op_common.cc:245] Compilation of 605a322884f2fcfa:0:0 with session name  took 6.373414226s and succeeded
I0000 00:00:1718322342.765577    6300 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(605a322884f2fcfa:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2731450_10505442023535809886", property.function_library_fingerprint = 4621837184129627120, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "110,48,;110,48,;110,48,;110,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718322342.765906    6300 tpu_compilation_cache_interface.cc:541] After adding 

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.799350917339325 and accuracy: 0.8093487620353699


[I 2024-06-13 23:46:10,999] Trial 44 finished with value: 0.8093487620353699 and parameters: {'batch_size_per_core': 128, 'num_epochs': 9, 'dropout_rate': 0.207296040138977, 'weight_decay': 0.011504739336764212, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718322625.361950    6256 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(118509275c8251d9:0:0), session_name()
I0000 00:00:1718322661.686419    6256 tpu_compile_op_common.cc:245] Compilation of 118509275c8251d9:0:0 with session name  took 36.324408064s and succeeded
I0000 00:00:1718322661.929744    6256 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(118509275c8251d9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2875740_15869018069618865990", property.function_library_fingerprint = 4557808838727510630, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_siz



I0000 00:00:1718322664.151307    6225 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(51444d92c7d7e3b0:0:0), session_name()
I0000 00:00:1718322696.755282    6225 tpu_compile_op_common.cc:245] Compilation of 51444d92c7d7e3b0:0:0 with session name  took 32.603917096s and succeeded




I0000 00:00:1718322696.960500    6225 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(51444d92c7d7e3b0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2875740_15869018069618865990", property.function_library_fingerprint = 4557808838727510630, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718322696.961841    6225 tpu_compilation_cache_interface.cc:541] After adding entry for key 51444d92c7d7e3b0:0:0 with session_name  cache is 95 entries (19957043833 bytes),  marked for eviction 55 entries (11659603966 bytes).
I0000 00:00



I0000 00:00:1718322735.079162    6259 tpu_compile_op_common.cc:245] Compilation of cbf442f198760644:0:0 with session name  took 5.435068543s and succeeded
I0000 00:00:1718322735.121167    6259 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cbf442f198760644:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2909406_7127545097264899756", property.function_library_fingerprint = 13926344998504737916, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718322735.121417    6259 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/3
Epoch 3/3
f1 score: 0.7324705719947815 and accuracy: 0.8413865566253662


I0000 00:00:1718322839.068418    6240 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d7884d8d31c93b0a:0:0), session_name()
I0000 00:00:1718322901.407620    6240 tpu_compile_op_common.cc:245] Compilation of d7884d8d31c93b0a:0:0 with session name  took 1m2.339147391s and succeeded
I0000 00:00:1718322901.611688    6240 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d7884d8d31c93b0a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2947086_9618087375244943996", property.function_library_fingerprint = 18333090160627435671, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_si



2024-06-13 23:56:31.594439: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718322993.604897    6253 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(747cb52685de2f8d:0:0), session_name()
I0000 00:00:1718322998.521123    6253 tpu_compile_op_common.cc:245] Compilation of 747cb52685de2f8d:0:0 with session name  took 4.916147064s and succeeded
I0000 00:00:1718322998.559338    6253 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(747cb52685de2f8d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8979649592791948641", property.function_library_fingerprint = 10400058201592758443, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718323009.854996    6281 tpu_compile_op_common.cc:245] Compilation of a81c0d147d59c7be:0:0 with session name  took 5.299729567s and succeeded
I0000 00:00:1718323009.902248    6281 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a81c0d147d59c7be:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2980542_2658859506422939280", property.function_library_fingerprint = 10400058201592758443, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "8,49,;8,49,;8,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718323009.902437    6281 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 1/5


I0000 00:00:1718323269.508829    6228 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4a49d932ccf76686:0:0), session_name()
I0000 00:00:1718323304.235604    6228 tpu_compile_op_common.cc:245] Compilation of 4a49d932ccf76686:0:0 with session name  took 34.726715536s and succeeded
I0000 00:00:1718323304.408564    6228 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4a49d932ccf76686:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3108882_5118897312178861308", property.function_library_fingerprint = 4820361353757120790, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constants_



I0000 00:00:1718323365.059540    6221 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e3a6e74bc4f61e32:0:0), session_name()
I0000 00:00:1718323371.137272    6221 tpu_compile_op_common.cc:245] Compilation of e3a6e74bc4f61e32:0:0 with session name  took 6.077690919s and succeeded
I0000 00:00:1718323371.189164    6221 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e3a6e74bc4f61e32:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3142510_8510621287319602737", property.function_library_fingerprint = 8485089124766563861, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_s



I0000 00:00:1718323378.218196    6234 tpu_compile_op_common.cc:245] Compilation of 78dc575d11cd745a:0:0 with session name  took 6.96955944s and succeeded
I0000 00:00:1718323378.273840    6234 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(78dc575d11cd745a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3142510_8510621287319602737", property.function_library_fingerprint = 8485089124766563861, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "110,48,;110,48,;110,48,;110,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718323378.274079    6234 tpu_compilation_cache_interface.cc:541] After adding en

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7905819416046143 and accuracy: 0.8209033608436584


[I 2024-06-14 00:03:13,402] Trial 53 finished with value: 0.8209033608436584 and parameters: {'batch_size_per_core': 128, 'num_epochs': 5, 'dropout_rate': 0.14007373928585415, 'weight_decay': 0.037194685551905346, 'lr_scheduler_type': 'linear'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718323648.236393    6218 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ca2df3028241fddb:0:0), session_name()
I0000 00:00:1718323682.939631    6218 tpu_compile_op_common.cc:245] Compilation of ca2df3028241fddb:0:0 with session name  took 34.703170475s and succeeded
I0000 00:00:1718323683.152510    6218 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ca2df3028241fddb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3279816_14250983927580741892", property.function_library_fingerprint = 12098998411999078253, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constant



I0000 00:00:1718323743.428043    6236 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2266d96ba1f360ee:0:0), session_name()
I0000 00:00:1718323750.730806    6236 tpu_compile_op_common.cc:245] Compilation of 2266d96ba1f360ee:0:0 with session name  took 7.302710358s and succeeded
I0000 00:00:1718323750.794603    6236 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2266d96ba1f360ee:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3313428_1865262126420155468", property.function_library_fingerprint = 6269299933304115172, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_s

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7690188884735107 and accuracy: 0.8371848464012146
Epoch 1/2


I0000 00:00:1718323868.043074    6250 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(75796cf101a709ad:0:0), session_name()
I0000 00:00:1718323928.045800    6250 tpu_compile_op_common.cc:245] Compilation of 75796cf101a709ad:0:0 with session name  took 1m0.002673684s and succeeded
I0000 00:00:1718323928.287793    6250 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(75796cf101a709ad:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3354528_9427631761829799690", property.function_library_fingerprint = 10834306699939379680, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constant

Epoch 2/2


2024-06-14 00:13:30.345934: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718324012.294765    6227 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5ea06f3ea89acaf0:0:0), session_name()
I0000 00:00:1718324018.718876    6227 tpu_compile_op_common.cc:245] Compilation of 5ea06f3ea89acaf0:0:0 with session name  took 6.424057586s and succeeded
I0000 00:00:1718324018.752852    6227 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5ea06f3ea89acaf0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_3665930131908105251", property.function_library_fingerprint = 13179792463383722227, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718324031.864024    6266 tpu_compile_op_common.cc:245] Compilation of ce4cf21ee5eb35dd:0:0 with session name  took 5.890610808s and succeeded
I0000 00:00:1718324031.906957    6266 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ce4cf21ee5eb35dd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3388598_11776674569924561175", property.function_library_fingerprint = 13179792463383722227, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718324031.907179    6266 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 1/6


I0000 00:00:1718324290.370837    6227 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9bf5501faef54bb2:0:0), session_name()
I0000 00:00:1718324326.973142    6227 tpu_compile_op_common.cc:245] Compilation of 9bf5501faef54bb2:0:0 with session name  took 36.602229609s and succeeded
I0000 00:00:1718324327.157099    6227 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9bf5501faef54bb2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3517096_13237064693671070590", property.function_library_fingerprint = 6718851265616925666, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718324386.273422    6276 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ea7921b37b29d0d9:0:0), session_name()
I0000 00:00:1718324392.455048    6276 tpu_compile_op_common.cc:245] Compilation of ea7921b37b29d0d9:0:0 with session name  took 6.181564382s and succeeded
I0000 00:00:1718324392.511225    6276 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ea7921b37b29d0d9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3550770_14577945865403263598", property.function_library_fingerprint = 12320049285355836775, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718324398.783800    6275 tpu_compile_op_common.cc:245] Compilation of b7859f4b5e0c9954:0:0 with session name  took 6.152006114s and succeeded
I0000 00:00:1718324398.831783    6275 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b7859f4b5e0c9954:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3550770_14577945865403263598", property.function_library_fingerprint = 12320049285355836775, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718324398.832103    6275 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7823933362960815 and accuracy: 0.8224790096282959


[I 2024-06-14 00:20:20,066] Trial 62 finished with value: 0.8224790096282959 and parameters: {'batch_size_per_core': 32, 'num_epochs': 6, 'dropout_rate': 0.28414913311786427, 'weight_decay': 0.013807355644798203, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718324676.325823    6303 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e19b698d1219789c:0:0), session_name()
I0000 00:00:1718324710.649536    6303 tpu_compile_op_common.cc:245] Compilation of e19b698d1219789c:0:0 with session name  took 34.323654707s and succeeded
I0000 00:00:1718324710.820476    6303 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e19b698d1219789c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3689804_10197188438915189245", property.function_library_fingerprint = 9251603040648327680, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_siz



I0000 00:00:1718324713.163968    6225 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6cebca494ec2325b:0:0), session_name()
I0000 00:00:1718324746.932870    6225 tpu_compile_op_common.cc:245] Compilation of 6cebca494ec2325b:0:0 with session name  took 33.768835716s and succeeded
I0000 00:00:1718324747.133812    6225 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6cebca494ec2325b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3689804_10197188438915189245", property.function_library_fingerprint = 9251603040648327680, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_siz



I0000 00:00:1718324773.918811    6259 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(619cf4065666940c:0:0), session_name()
I0000 00:00:1718324779.296805    6259 tpu_compile_op_common.cc:245] Compilation of 619cf4065666940c:0:0 with session name  took 5.377902029s and succeeded
I0000 00:00:1718324779.350083    6259 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(619cf4065666940c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3723470_555775958067996960", property.function_library_fingerprint = 7726788626664616496, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size =



I0000 00:00:1718324785.333267    6305 tpu_compile_op_common.cc:245] Compilation of c7c3275ebf5ca6f0:0:0 with session name  took 5.797638201s and succeeded
I0000 00:00:1718324785.370075    6305 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c7c3275ebf5ca6f0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3723470_555775958067996960", property.function_library_fingerprint = 7726788626664616496, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718324785.370284    6305 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/3
Epoch 3/3
f1 score: 0.7457631826400757 and accuracy: 0.8287814855575562


[I 2024-06-14 00:26:36,789] Trial 66 finished with value: 0.8287814855575562 and parameters: {'batch_size_per_core': 16, 'num_epochs': 3, 'dropout_rate': 0.2166422857653264, 'weight_decay': 0.019583638900209074, 'lr_scheduler_type': 'constant'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718325053.599693    6311 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9ba87465fc46d367:0:0), session_name()
I0000 00:00:1718325092.610305    6311 tpu_compile_op_common.cc:245] Compilation of 9ba87465fc46d367:0:0 with session name  took 39.010536533s and succeeded
I0000 00:00:1718325092.844007    6311 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9ba87465fc46d367:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3857356_10346641103970463485", property.function_library_fingerprint = 381364747862446137, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_size



I0000 00:00:1718325154.148883    6276 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(13b30cd11af2f636:0:0), session_name()
I0000 00:00:1718325160.384288    6276 tpu_compile_op_common.cc:245] Compilation of 13b30cd11af2f636:0:0 with session name  took 6.235354565s and succeeded
I0000 00:00:1718325160.433092    6276 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(13b30cd11af2f636:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3890968_7053881166934414340", property.function_library_fingerprint = 8362275994921553577, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718325166.689078    6259 tpu_compile_op_common.cc:245] Compilation of d2399803a6ea906a:0:0 with session name  took 6.137434801s and succeeded
I0000 00:00:1718325166.739577    6259 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d2399803a6ea906a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3890968_7053881166934414340", property.function_library_fingerprint = 8362275994921553577, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718325166.739929    6259 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7609826326370239 and accuracy: 0.8350840210914612


I0000 00:00:1718325274.621338    6274 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(419b5e2e19a11e76:0:0), session_name()
I0000 00:00:1718325342.213715    6274 tpu_compile_op_common.cc:245] Compilation of 419b5e2e19a11e76:0:0 with session name  took 1m7.592261968s and succeeded
I0000 00:00:1718325342.489596    6274 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(419b5e2e19a11e76:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3930316_18337665398335070194", property.function_library_fingerprint = 10609644592828734001, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_s



2024-06-14 00:37:10.303063: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718325432.145813    6263 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b2310a8e0257cc0f:0:0), session_name()
I0000 00:00:1718325437.488865    6263 tpu_compile_op_common.cc:245] Compilation of b2310a8e0257cc0f:0:0 with session name  took 5.342993383s and succeeded
I0000 00:00:1718325437.523077    6263 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b2310a8e0257cc0f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_1753723856245539463", property.function_library_fingerprint = 12812285287522152079, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718325450.430828    6224 tpu_compile_op_common.cc:245] Compilation of bd267cd2eedf063b:0:0 with session name  took 5.999706673s and succeeded
I0000 00:00:1718325450.483759    6224 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bd267cd2eedf063b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3963772_5015935433378516706", property.function_library_fingerprint = 12812285287522152079, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718325450.484055    6224 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/11


I0000 00:00:1718325708.295880    6312 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(44905fe7a2b18220:0:0), session_name()
I0000 00:00:1718325745.230286    6312 tpu_compile_op_common.cc:245] Compilation of 44905fe7a2b18220:0:0 with session name  took 36.934358923s and succeeded
I0000 00:00:1718325745.412303    6312 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(44905fe7a2b18220:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4092128_17272520433640318526", property.function_library_fingerprint = 1129572818616316427, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718325781.242777    6249 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(18ef2555396f3e0b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4092128_17272520433640318526", property.function_library_fingerprint = 1129572818616316427, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718325781.245814    6249 tpu_compilation_cache_interface.cc:541] After adding entry for key 18ef2555396f3e0b:0:0 with session_name  cache is 134 entries (27777154826 bytes),  marked for eviction 91 entries (19224843525 bytes).
I0000 00:0



I0000 00:00:1718325821.072557    6231 tpu_compile_op_common.cc:245] Compilation of 9d2db40ed6f0ad3e:0:0 with session name  took 5.911505328s and succeeded
I0000 00:00:1718325821.116479    6231 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9d2db40ed6f0ad3e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4125760_619792264506130587", property.function_library_fingerprint = 4208043398393710371, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718325821.116688    6231 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.8042591214179993 and accuracy: 0.838760495185852
Epoch 1/4


I0000 00:00:1718325949.814633    6298 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b915b96a9b975a58:0:0), session_name()
I0000 00:00:1718326019.429479    6298 tpu_compile_op_common.cc:245] Compilation of b915b96a9b975a58:0:0 with session name  took 1m9.614792765s and succeeded
I0000 00:00:1718326019.711101    6298 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b915b96a9b975a58:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4177392_517992629772763659", property.function_library_fingerprint = 17283482202378876406, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_siz

Epoch 2/4
Epoch 3/4
Epoch 4/4


2024-06-14 00:48:28.549575: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718326110.571960    6295 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(998fa7fe8e6867e0:0:0), session_name()
I0000 00:00:1718326115.438061    6295 tpu_compile_op_common.cc:245] Compilation of 998fa7fe8e6867e0:0:0 with session name  took 4.866048465s and succeeded
I0000 00:00:1718326115.475437    6295 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(998fa7fe8e6867e0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8283995605159524399", property.function_library_fingerprint = 9154297708492361211, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718326127.991849    6293 tpu_compile_op_common.cc:245] Compilation of 77a217c17946269e:0:0 with session name  took 6.147377863s and succeeded
I0000 00:00:1718326128.035426    6293 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(77a217c17946269e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4212710_11783713556819010514", property.function_library_fingerprint = 9154297708492361211, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718326128.035826    6293 tpu_compilation_cache_interface.cc:541] After adding entry fo

Epoch 1/5


I0000 00:00:1718326386.559445    6297 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(682a2a0d1dc485d8:0:0), session_name()
I0000 00:00:1718326423.058603    6297 tpu_compile_op_common.cc:245] Compilation of 682a2a0d1dc485d8:0:0 with session name  took 36.499081787s and succeeded
I0000 00:00:1718326423.244367    6297 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(682a2a0d1dc485d8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4340994_15581890642629663438", property.function_library_fingerprint = 15591875237815266983, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,50,;64,50,;64,50,;64,;", property.guaranteed_constants_si



I0000 00:00:1718326456.665900    6239 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b57f47c94637c0a4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4340994_15581890642629663438", property.function_library_fingerprint = 15591875237815266983, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718326456.667014    6239 tpu_compilation_cache_interface.cc:541] After adding entry for key b57f47c94637c0a4:0:0 with session_name  cache is 143 entries (29556356394 bytes),  marked for eviction 101 entries (20974224626 bytes).
I0000 00



I0000 00:00:1718326496.998245    6301 tpu_compile_op_common.cc:245] Compilation of fedf42562a4dc851:0:0 with session name  took 7.202912476s and succeeded
I0000 00:00:1718326497.045453    6301 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fedf42562a4dc851:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4374606_5024549583064736534", property.function_library_fingerprint = 5024581605642414012, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "46,48,;46,48,;46,48,;46,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718326497.045847    6301 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7693256139755249 and accuracy: 0.8408613204956055
Epoch 1/2


I0000 00:00:1718326605.750795    6240 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c3e01746b283c656:0:0), session_name()
I0000 00:00:1718326667.733641    6240 tpu_compile_op_common.cc:245] Compilation of c3e01746b283c656:0:0 with session name  took 1m1.982767145s and succeeded
I0000 00:00:1718326668.011850    6240 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c3e01746b283c656:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4415706_7989436402935513459", property.function_library_fingerprint = 1445623445579180870, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,48,;64,48,;64,48,;64,;", property.guaranteed_constants_siz

Epoch 2/2


2024-06-14 00:59:19.728017: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718326761.667790    6295 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a060a19f1366c18f:0:0), session_name()
I0000 00:00:1718326767.759208    6295 tpu_compile_op_common.cc:245] Compilation of a060a19f1366c18f:0:0 with session name  took 6.091351473s and succeeded
I0000 00:00:1718326767.798082    6295 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a060a19f1366c18f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15636433567386653861", property.function_library_fingerprint = 2249210184729206436, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718326781.933399    6223 tpu_compile_op_common.cc:245] Compilation of a08b5b88eb122da7:0:0 with session name  took 6.29568474s and succeeded
I0000 00:00:1718326781.978673    6223 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a08b5b88eb122da7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4449776_16467280292476001881", property.function_library_fingerprint = 2249210184729206436, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,49,;24,49,;24,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718326781.978910    6223 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/20


I0000 00:00:1718327038.367513    6251 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(41c84284413d1e69:0:0), session_name()
I0000 00:00:1718327079.364368    6251 tpu_compile_op_common.cc:245] Compilation of 41c84284413d1e69:0:0 with session name  took 40.996788008s and succeeded
I0000 00:00:1718327079.585627    6251 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(41c84284413d1e69:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4578060_15924623979223608903", property.function_library_fingerprint = 14586795813152726538, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_si



I0000 00:00:1718327141.495949    6283 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d970585275d5074e:0:0), session_name()
I0000 00:00:1718327148.172570    6283 tpu_compile_op_common.cc:245] Compilation of d970585275d5074e:0:0 with session name  took 6.67656331s and succeeded
I0000 00:00:1718327148.225579    6283 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d970585275d5074e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4611672_1749087696038555239", property.function_library_fingerprint = 6521421678391268359, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_size =



I0000 00:00:1718327154.586465    6239 tpu_compile_op_common.cc:245] Compilation of 854f010d1b65e60:0:0 with session name  took 6.229595803s and succeeded
I0000 00:00:1718327154.630785    6239 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(854f010d1b65e60:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4611672_1749087696038555239", property.function_library_fingerprint = 6521421678391268359, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718327154.631044    6239 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
f1 score: 0.8081512451171875 and accuracy: 0.8161764740943909


[I 2024-06-14 01:07:02,565] Trial 83 finished with value: 0.8161764740943909 and parameters: {'batch_size_per_core': 32, 'num_epochs': 20, 'dropout_rate': 0.41659198092903893, 'weight_decay': 0.021621838916133295, 'lr_scheduler_type': 'constant'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718327476.279421    6309 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(80ca96230342e0ec:0:0), session_name()
I0000 00:00:1718327516.509275    6309 tpu_compile_op_common.cc:245] Compilation of 80ca96230342e0ec:0:0 with session name  took 40.229789788s and succeeded
I0000 00:00:1718327516.729709    6309 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(80ca96230342e0ec:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4775258_6427508160096836063", property.function_library_fingerprint = 4189540738007782798, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_size



I0000 00:00:1718327582.291738    6233 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(96f02737834f8a2c:0:0), session_name()
I0000 00:00:1718327589.836678    6233 tpu_compile_op_common.cc:245] Compilation of 96f02737834f8a2c:0:0 with session name  took 7.544889402s and succeeded
I0000 00:00:1718327589.891695    6233 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(96f02737834f8a2c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4808870_3155488046978608419", property.function_library_fingerprint = 2549579612301503783, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718327597.074991    6248 tpu_compile_op_common.cc:245] Compilation of 493e6470a80c9b82:0:0 with session name  took 7.060216292s and succeeded
I0000 00:00:1718327597.130538    6248 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(493e6470a80c9b82:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4808870_3155488046978608419", property.function_library_fingerprint = 2549579612301503783, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718327597.130891    6248 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.8054207563400269 and accuracy: 0.8214285969734192


[I 2024-06-14 01:13:36,851] Trial 87 finished with value: 0.8214285969734192 and parameters: {'batch_size_per_core': 32, 'num_epochs': 6, 'dropout_rate': 0.2149418824934869, 'weight_decay': 0.010218835670536138, 'lr_scheduler_type': 'constant'}. Best is trial 17 with value: 0.8482142686843872.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718327869.312991    6237 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8be8fc061eeea4e5:0:0), session_name()
I0000 00:00:1718327907.666885    6237 tpu_compile_op_common.cc:245] Compilation of 8be8fc061eeea4e5:0:0 with session name  took 38.3538345s and succeeded
I0000 00:00:1718327907.856129    6237 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8be8fc061eeea4e5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4948118_2211086623227328247", property.function_library_fingerprint = 9509254388027938979, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size =



I0000 00:00:1718327910.074589    6309 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1602e69e620423fe:0:0), session_name()
I0000 00:00:1718327944.802740    6309 tpu_compile_op_common.cc:245] Compilation of 1602e69e620423fe:0:0 with session name  took 34.728066163s and succeeded
I0000 00:00:1718327944.970838    6309 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1602e69e620423fe:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4948118_2211086623227328247", property.function_library_fingerprint = 9509254388027938979, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size



I0000 00:00:1718327970.270762    6225 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(afc04e269856de51:0:0), session_name()
I0000 00:00:1718327976.858679    6225 tpu_compile_op_common.cc:245] Compilation of afc04e269856de51:0:0 with session name  took 6.587845956s and succeeded
I0000 00:00:1718327976.903582    6225 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(afc04e269856de51:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4981846_1360376903866472825", property.function_library_fingerprint = 7804270899591620439, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size 



I0000 00:00:1718327983.354641    6282 tpu_compile_op_common.cc:245] Compilation of 50bd0821f92b4d30:0:0 with session name  took 6.265640973s and succeeded
I0000 00:00:1718327983.395309    6282 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(50bd0821f92b4d30:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4981846_1360376903866472825", property.function_library_fingerprint = 7804270899591620439, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718327983.395694    6282 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7760840654373169 and accuracy: 0.8450630307197571


I0000 00:00:1718328092.850529    6300 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2656c2bac11b021f:0:0), session_name()
I0000 00:00:1718328158.721659    6300 tpu_compile_op_common.cc:245] Compilation of 2656c2bac11b021f:0:0 with session name  took 1m5.871065722s and succeeded
I0000 00:00:1718328158.986695    6300 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2656c2bac11b021f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5021390_14605907802620370648", property.function_library_fingerprint = 14178386391304799059, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_s



2024-06-14 01:24:09.256253: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718328251.053493    6252 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(41f9ef884bd59c8e:0:0), session_name()
I0000 00:00:1718328256.094442    6252 tpu_compile_op_common.cc:245] Compilation of 41f9ef884bd59c8e:0:0 with session name  took 5.040890216s and succeeded
I0000 00:00:1718328256.126286    6252 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(41f9ef884bd59c8e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_10143042907426242953", property.function_library_fingerprint = 2793650794480758253, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718328268.180245    6278 tpu_compile_op_common.cc:245] Compilation of bfef348d2461f35:0:0 with session name  took 5.397607411s and succeeded
I0000 00:00:1718328268.222868    6278 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bfef348d2461f35:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5054908_3574944769898335559", property.function_library_fingerprint = 2793650794480758253, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "8,49,;8,49,;8,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718328268.223081    6278 tpu_compilation_cache_interface.cc:541] After adding entry for key 

Epoch 1/3


I0000 00:00:1718328528.743126    6246 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1397cfd53c9111a:0:0), session_name()
I0000 00:00:1718328566.066131    6246 tpu_compile_op_common.cc:245] Compilation of 1397cfd53c9111a:0:0 with session name  took 37.322938454s and succeeded
I0000 00:00:1718328566.271021    6246 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1397cfd53c9111a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5183382_4754277181703842469", property.function_library_fingerprint = 13644453234032978278, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size =



I0000 00:00:1718328568.473446    6252 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(938334c2555ebd03:0:0), session_name()
I0000 00:00:1718328604.170059    6252 tpu_compile_op_common.cc:245] Compilation of 938334c2555ebd03:0:0 with session name  took 35.696545352s and succeeded




I0000 00:00:1718328604.387859    6252 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(938334c2555ebd03:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5183382_4754277181703842469", property.function_library_fingerprint = 13644453234032978278, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718328604.389892    6252 tpu_compilation_cache_interface.cc:541] After adding entry for key 938334c2555ebd03:0:0 with session_name  cache is 169 entries (34453589525 bytes),  marked for eviction 125 entries (26029636198 bytes).
I0000 00:



I0000 00:00:1718328644.808429    6234 tpu_compile_op_common.cc:245] Compilation of ce6371ab4cf88bf4:0:0 with session name  took 6.265088026s and succeeded
I0000 00:00:1718328644.859357    6234 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ce6371ab4cf88bf4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5217110_11209698959049699554", property.function_library_fingerprint = 4628589917780099028, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718328644.859611    6234 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/3
Epoch 3/3
f1 score: 0.7557907700538635 and accuracy: 0.8434873819351196


I0000 00:00:1718328749.242714    6230 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(976b58ec7e7622d1:0:0), session_name()
I0000 00:00:1718328820.531850    6230 tpu_compile_op_common.cc:245] Compilation of 976b58ec7e7622d1:0:0 with session name  took 1m11.28907746s and succeeded
I0000 00:00:1718328820.807252    6230 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(976b58ec7e7622d1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5254848_5344244194014628104", property.function_library_fingerprint = 12179682079660913287, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_si



2024-06-14 01:35:20.641110: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718328922.549685    6218 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fd9ca32fc48c109:0:0), session_name()
I0000 00:00:1718328927.943022    6218 tpu_compile_op_common.cc:245] Compilation of fd9ca32fc48c109:0:0 with session name  took 5.393282016s and succeeded
I0000 00:00:1718328927.984829    6218 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fd9ca32fc48c109:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_6580411310470200762", property.function_library_fingerprint = 6346814016991571851, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology



I0000 00:00:1718328940.211044    6229 tpu_compile_op_common.cc:245] Compilation of 301b7bf3ceba0a78:0:0 with session name  took 5.277090134s and succeeded
I0000 00:00:1718328940.258683    6229 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(301b7bf3ceba0a78:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5288366_176089107298194948", property.function_library_fingerprint = 6346814016991571851, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "8,49,;8,49,;8,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718328940.258986    6229 tpu_compilation_cache_interface.cc:541] After adding entry for key

Epoch 1/5


I0000 00:00:1718329198.291116    6253 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4683f2749d5c593c:0:0), session_name()
I0000 00:00:1718329234.295051    6253 tpu_compile_op_common.cc:245] Compilation of 4683f2749d5c593c:0:0 with session name  took 36.003854231s and succeeded
I0000 00:00:1718329234.513307    6253 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4683f2749d5c593c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5416840_15848194767735853935", property.function_library_fingerprint = 17829018379105150932, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_si



I0000 00:00:1718329236.696920    6257 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5c3e0cc58e5f3c0b:0:0), session_name()
I0000 00:00:1718329272.321514    6257 tpu_compile_op_common.cc:245] Compilation of 5c3e0cc58e5f3c0b:0:0 with session name  took 35.624552541s and succeeded
I0000 00:00:1718329272.487487    6257 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5c3e0cc58e5f3c0b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5416840_15848194767735853935", property.function_library_fingerprint = 17829018379105150932, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_si



I0000 00:00:1718329298.485065    6225 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(22875e3334cc61a0:0:0), session_name()
I0000 00:00:1718329304.699774    6225 tpu_compile_op_common.cc:245] Compilation of 22875e3334cc61a0:0:0 with session name  took 6.214630747s and succeeded
I0000 00:00:1718329304.753521    6225 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(22875e3334cc61a0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5450568_18346841538923595613", property.function_library_fingerprint = 3943013168392938872, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size



I0000 00:00:1718329311.300190    6263 tpu_compile_op_common.cc:245] Compilation of fabbdb10ae615cca:0:0 with session name  took 6.367917385s and succeeded
I0000 00:00:1718329311.349668    6263 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fabbdb10ae615cca:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5450568_18346841538923595613", property.function_library_fingerprint = 3943013168392938872, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718329311.349924    6263 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7855245471000671 and accuracy: 0.832457959651947


[I 2024-06-14 01:42:12,386] Trial 101 finished with value: 0.832457959651947 and parameters: {'batch_size_per_core': 16, 'num_epochs': 5, 'dropout_rate': 0.17399941314304893, 'weight_decay': 0.057644929808318826, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718329585.645795    6249 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(60b0a7de5aa69785:0:0), session_name()
I0000 00:00:1718329623.568427    6249 tpu_compile_op_common.cc:245] Compilation of 60b0a7de5aa69785:0:0 with session name  took 37.92256323s and succeeded
I0000 00:00:1718329623.808238    6249 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(60b0a7de5aa69785:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5588256_3361178801172724475", property.function_library_fingerprint = 14387576253786143691, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size



I0000 00:00:1718329626.009581    6299 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ae9f806ec278810b:0:0), session_name()
I0000 00:00:1718329660.878997    6299 tpu_compile_op_common.cc:245] Compilation of ae9f806ec278810b:0:0 with session name  took 34.869359211s and succeeded




I0000 00:00:1718329661.098974    6299 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ae9f806ec278810b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5588256_3361178801172724475", property.function_library_fingerprint = 14387576253786143691, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718329661.100102    6299 tpu_compilation_cache_interface.cc:541] After adding entry for key ae9f806ec278810b:0:0 with session_name  cache is 182 entries (36762085343 bytes),  marked for eviction 137 entries (28342099020 bytes).
I0000 00:



I0000 00:00:1718329701.589976    6283 tpu_compile_op_common.cc:245] Compilation of b8c7e3df68761f92:0:0 with session name  took 6.255637571s and succeeded
I0000 00:00:1718329701.638499    6283 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b8c7e3df68761f92:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5621984_10740904376187726575", property.function_library_fingerprint = 12289899873660771830, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718329701.638893    6283 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/3
Epoch 3/3
f1 score: 0.753078818321228 and accuracy: 0.8146008253097534


[I 2024-06-14 01:48:33,061] Trial 105 finished with value: 0.8146008253097534 and parameters: {'batch_size_per_core': 16, 'num_epochs': 3, 'dropout_rate': 0.20253443046705003, 'weight_decay': 0.036037053955344206, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2


I0000 00:00:1718329968.889425    6288 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(87336dae04e47a72:0:0), session_name()
I0000 00:00:1718330005.753759    6288 tpu_compile_op_common.cc:245] Compilation of 87336dae04e47a72:0:0 with session name  took 36.864284072s and succeeded
I0000 00:00:1718330005.961560    6288 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(87336dae04e47a72:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5756060_4592976811457777952", property.function_library_fingerprint = 5034162016204045503, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size



I0000 00:00:1718330008.044055    6233 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d283ec4a8a546ac6:0:0), session_name()
I0000 00:00:1718330044.086164    6233 tpu_compile_op_common.cc:245] Compilation of d283ec4a8a546ac6:0:0 with session name  took 36.042064503s and succeeded
I0000 00:00:1718330044.289829    6233 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d283ec4a8a546ac6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5756060_4592976811457777952", property.function_library_fingerprint = 5034162016204045503, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size



I0000 00:00:1718330070.900792    6295 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ea65d4da30af3c61:0:0), session_name()
I0000 00:00:1718330076.972368    6295 tpu_compile_op_common.cc:245] Compilation of ea65d4da30af3c61:0:0 with session name  took 6.071519223s and succeeded
I0000 00:00:1718330077.017554    6295 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ea65d4da30af3c61:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5789788_5727993056985546705", property.function_library_fingerprint = 3717184462388129845, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size 



I0000 00:00:1718330082.962579    6223 tpu_compile_op_common.cc:245] Compilation of 5a0df791a713ed8f:0:0 with session name  took 5.769420865s and succeeded
I0000 00:00:1718330083.006580    6223 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5a0df791a713ed8f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5789788_5727993056985546705", property.function_library_fingerprint = 3717184462388129845, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718330083.006888    6223 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/2
f1 score: 0.7265232801437378 and accuracy: 0.8235294222831726


[I 2024-06-14 01:54:50,253] Trial 109 finished with value: 0.8235294222831726 and parameters: {'batch_size_per_core': 16, 'num_epochs': 2, 'dropout_rate': 0.23935701021903913, 'weight_decay': 0.032766334468114694, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718330346.372639    6233 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(471d7fc0cbd58c1b:0:0), session_name()
I0000 00:00:1718330383.392983    6233 tpu_compile_op_common.cc:245] Compilation of 471d7fc0cbd58c1b:0:0 with session name  took 37.020260616s and succeeded
I0000 00:00:1718330383.608973    6233 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(471d7fc0cbd58c1b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5922058_15863906701975931652", property.function_library_fingerprint = 13978882918873331858, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_si



I0000 00:00:1718330385.726374    6229 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b5164916b3c53226:0:0), session_name()
I0000 00:00:1718330420.316042    6229 tpu_compile_op_common.cc:245] Compilation of b5164916b3c53226:0:0 with session name  took 34.589608559s and succeeded




I0000 00:00:1718330420.531596    6229 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b5164916b3c53226:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5922058_15863906701975931652", property.function_library_fingerprint = 13978882918873331858, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718330420.533702    6229 tpu_compilation_cache_interface.cc:541] After adding entry for key b5164916b3c53226:0:0 with session_name  cache is 190 entries (38118471919 bytes),  marked for eviction 143 entries (29556356394 bytes).
I0000 00



I0000 00:00:1718330458.525762    6244 tpu_compile_op_common.cc:245] Compilation of c50d7d4eca4a082f:0:0 with session name  took 6.282727534s and succeeded
I0000 00:00:1718330458.591580    6244 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c50d7d4eca4a082f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5955786_524039073469965876", property.function_library_fingerprint = 12589436040485904283, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718330458.591897    6244 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7045586109161377 and accuracy: 0.8072478771209717


[I 2024-06-14 02:01:13,976] Trial 112 finished with value: 0.8072478771209717 and parameters: {'batch_size_per_core': 16, 'num_epochs': 4, 'dropout_rate': 0.48021260579191305, 'weight_decay': 0.042732457163779006, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718330731.824010    6225 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(694749663059d9b8:0:0), session_name()
I0000 00:00:1718330769.576886    6225 tpu_compile_op_common.cc:245] Compilation of 694749663059d9b8:0:0 with session name  took 37.752779736s and succeeded
I0000 00:00:1718330769.784379    6225 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(694749663059d9b8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6091692_11447509715310933701", property.function_library_fingerprint = 17237213113125724752, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_si



I0000 00:00:1718330830.856219    6236 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f678b16de2c4e713:0:0), session_name()
I0000 00:00:1718330837.996934    6236 tpu_compile_op_common.cc:245] Compilation of f678b16de2c4e713:0:0 with session name  took 7.140658177s and succeeded
I0000 00:00:1718330838.065142    6236 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f678b16de2c4e713:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6125366_13775831913591949013", property.function_library_fingerprint = 14076954546966095036, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718330844.929224    6295 tpu_compile_op_common.cc:245] Compilation of 69680613d1799567:0:0 with session name  took 6.755993631s and succeeded
I0000 00:00:1718330844.971187    6295 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(69680613d1799567:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6125366_13775831913591949013", property.function_library_fingerprint = 14076954546966095036, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718330844.971474    6295 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.779064416885376 and accuracy: 0.8356092572212219


[I 2024-06-14 02:07:44,519] Trial 116 finished with value: 0.8356092572212219 and parameters: {'batch_size_per_core': 32, 'num_epochs': 6, 'dropout_rate': 0.288322642306206, 'weight_decay': 0.024716944767130877, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718331121.414687    6279 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(931dc6699fdefd1b:0:0), session_name()
I0000 00:00:1718331159.669243    6279 tpu_compile_op_common.cc:245] Compilation of 931dc6699fdefd1b:0:0 with session name  took 38.25450242s and succeeded
I0000 00:00:1718331159.892038    6279 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(931dc6699fdefd1b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6264480_14672961064377120471", property.function_library_fingerprint = 11944142385734210568, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718331221.537959    6219 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9e66edf3be2f0f30:0:0), session_name()
I0000 00:00:1718331229.959035    6219 tpu_compile_op_common.cc:245] Compilation of 9e66edf3be2f0f30:0:0 with session name  took 8.421019494s and succeeded
I0000 00:00:1718331230.018075    6219 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9e66edf3be2f0f30:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6298108_15557784292528699371", property.function_library_fingerprint = 1698119893672546463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_size



I0000 00:00:1718331236.679515    6298 tpu_compile_op_common.cc:245] Compilation of 5992f9bd73b5eb8:0:0 with session name  took 6.549115634s and succeeded
I0000 00:00:1718331236.734238    6298 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5992f9bd73b5eb8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6298108_15557784292528699371", property.function_library_fingerprint = 1698119893672546463, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718331236.734523    6298 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7722010612487793 and accuracy: 0.8198529481887817


[I 2024-06-14 02:14:13,052] Trial 120 finished with value: 0.8198529481887817 and parameters: {'batch_size_per_core': 32, 'num_epochs': 5, 'dropout_rate': 0.24754515256000936, 'weight_decay': 0.018357859187752897, 'lr_scheduler_type': 'linear'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718331509.659390    6300 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d6058f000fb74b54:0:0), session_name()
I0000 00:00:1718331547.630364    6300 tpu_compile_op_common.cc:245] Compilation of d6058f000fb74b54:0:0 with session name  took 37.970904168s and succeeded
I0000 00:00:1718331547.825208    6300 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d6058f000fb74b54:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6435628_1183320881022324201", property.function_library_fingerprint = 16018419741043612809, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constants



I0000 00:00:1718331611.463209    6253 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d0ffc8d4abdbe8a4:0:0), session_name()
I0000 00:00:1718331619.632481    6253 tpu_compile_op_common.cc:245] Compilation of d0ffc8d4abdbe8a4:0:0 with session name  took 8.169200953s and succeeded
I0000 00:00:1718331619.684414    6253 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d0ffc8d4abdbe8a4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6469302_12316791995350607955", property.function_library_fingerprint = 9472271635965619908, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_



I0000 00:00:1718331626.878016    6266 tpu_compile_op_common.cc:245] Compilation of a6bb75361fd3c454:0:0 with session name  took 7.12961731s and succeeded
I0000 00:00:1718331626.919952    6266 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a6bb75361fd3c454:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6469302_12316791995350607955", property.function_library_fingerprint = 9472271635965619908, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "110,48,;110,48,;110,48,;110,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718331626.920219    6266 tpu_compilation_cache_interface.cc:541] After adding e

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7919882535934448 and accuracy: 0.8293067216873169


[I 2024-06-14 02:21:00,603] Trial 123 finished with value: 0.8293067216873169 and parameters: {'batch_size_per_core': 128, 'num_epochs': 10, 'dropout_rate': 0.25164494191236186, 'weight_decay': 0.03041122297410168, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718331916.788374    6250 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e36b227a551bfcd7:0:0), session_name()
I0000 00:00:1718331955.200049    6250 tpu_compile_op_common.cc:245] Compilation of e36b227a551bfcd7:0:0 with session name  took 38.411611195s and succeeded
I0000 00:00:1718331955.407536    6250 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e36b227a551bfcd7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6615424_3192836412759786525", property.function_library_fingerprint = 9597484421133877731, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_size



I0000 00:00:1718332016.280265    6248 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(36c7db588ce15ec1:0:0), session_name()
I0000 00:00:1718332024.224545    6248 tpu_compile_op_common.cc:245] Compilation of 36c7db588ce15ec1:0:0 with session name  took 7.944194044s and succeeded
I0000 00:00:1718332024.274141    6248 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(36c7db588ce15ec1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6649052_15147231621620883239", property.function_library_fingerprint = 13906114019332989153, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,48,;32,48,;32,48,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718332031.539175    6293 tpu_compile_op_common.cc:245] Compilation of 520d39da4c05fbdd:0:0 with session name  took 7.141619505s and succeeded
I0000 00:00:1718332031.595354    6293 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(520d39da4c05fbdd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6649052_15147231621620883239", property.function_library_fingerprint = 13906114019332989153, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718332031.595613    6293 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.809573233127594 and accuracy: 0.7972689270973206


[I 2024-06-14 02:27:36,930] Trial 127 finished with value: 0.7972689270973206 and parameters: {'batch_size_per_core': 32, 'num_epochs': 7, 'dropout_rate': 0.23104453146171736, 'weight_decay': 0.0319781122921095, 'lr_scheduler_type': 'linear'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718332312.616825    6278 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cc6b2eb1430a44cf:0:0), session_name()
I0000 00:00:1718332351.413758    6278 tpu_compile_op_common.cc:245] Compilation of cc6b2eb1430a44cf:0:0 with session name  took 38.796856076s and succeeded
I0000 00:00:1718332351.601394    6278 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc6b2eb1430a44cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6789934_4204218330040612983", property.function_library_fingerprint = 5327147556062642630, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "64,50,;64,50,;64,50,;64,;", property.guaranteed_constants_size



I0000 00:00:1718332388.590905    6294 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(af6d9442d7852cbc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6789934_4204218330040612983", property.function_library_fingerprint = 5327147556062642630, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718332388.591710    6294 tpu_compilation_cache_interface.cc:541] After adding entry for key af6d9442d7852cbc:0:0 with session_name  cache is 210 entries (41863128642 bytes),  marked for eviction 163 entries (33334524036 bytes).
I0000 00:0



I0000 00:00:1718332432.601575    6276 tpu_compile_op_common.cc:245] Compilation of 1ba3f154e7cd117a:0:0 with session name  took 7.943243303s and succeeded
I0000 00:00:1718332432.653894    6276 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1ba3f154e7cd117a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6823566_9132484616935748299", property.function_library_fingerprint = 16817788314027915536, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "46,48,;46,48,;46,48,;46,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718332432.654186    6276 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7476027011871338 and accuracy: 0.8356092572212219


[I 2024-06-14 02:34:11,513] Trial 131 finished with value: 0.8356092572212219 and parameters: {'batch_size_per_core': 64, 'num_epochs': 6, 'dropout_rate': 0.301336976898355, 'weight_decay': 0.05413584455356151, 'lr_scheduler_type': 'cosine'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718332712.117351    6275 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c4bc09a1b795e7cb:0:0), session_name()
I0000 00:00:1718332747.338550    6275 tpu_compile_op_common.cc:245] Compilation of c4bc09a1b795e7cb:0:0 with session name  took 35.221129402s and succeeded
I0000 00:00:1718332747.517089    6275 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c4bc09a1b795e7cb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6962814_1901060600109829656", property.function_library_fingerprint = 8731921181028961019, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size



I0000 00:00:1718332749.749202    6281 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e2a10c75b04b2f45:0:0), session_name()
I0000 00:00:1718332785.333303    6281 tpu_compile_op_common.cc:245] Compilation of e2a10c75b04b2f45:0:0 with session name  took 35.58404529s and succeeded




I0000 00:00:1718332785.537251    6281 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e2a10c75b04b2f45:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6962814_1901060600109829656", property.function_library_fingerprint = 8731921181028961019, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718332785.538274    6281 tpu_compilation_cache_interface.cc:541] After adding entry for key e2a10c75b04b2f45:0:0 with session_name  cache is 214 entries (42567970165 bytes),  marked for eviction 168 entries (34189380349 bytes).
I0000 00:0



I0000 00:00:1718332829.376451    6306 tpu_compile_op_common.cc:245] Compilation of e1de2e70caf24352:0:0 with session name  took 6.947635676s and succeeded
I0000 00:00:1718332829.432489    6306 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e1de2e70caf24352:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6996542_17060061180811052628", property.function_library_fingerprint = 11549082818818874729, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718332829.432849    6306 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/3
Epoch 3/3
f1 score: 0.7625232934951782 and accuracy: 0.8398109078407288


[I 2024-06-14 02:40:40,999] Trial 135 finished with value: 0.8398109078407288 and parameters: {'batch_size_per_core': 16, 'num_epochs': 3, 'dropout_rate': 0.21104442910317037, 'weight_decay': 0.023768743762664097, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718333098.951465    6262 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(56b56775a3920da2:0:0), session_name()
I0000 00:00:1718333136.945748    6262 tpu_compile_op_common.cc:245] Compilation of 56b56775a3920da2:0:0 with session name  took 37.994229493s and succeeded
I0000 00:00:1718333137.157297    6262 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(56b56775a3920da2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7130618_2820000750967549095", property.function_library_fingerprint = 3511748806720239540, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size



I0000 00:00:1718333139.365498    6257 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3492d27f61d9c515:0:0), session_name()
I0000 00:00:1718333176.002616    6257 tpu_compile_op_common.cc:245] Compilation of 3492d27f61d9c515:0:0 with session name  took 36.637060346s and succeeded
I0000 00:00:1718333176.196419    6257 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3492d27f61d9c515:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7130618_2820000750967549095", property.function_library_fingerprint = 3511748806720239540, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size



I0000 00:00:1718333202.545205    6235 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(da290b1de81a045c:0:0), session_name()
I0000 00:00:1718333209.237058    6235 tpu_compile_op_common.cc:245] Compilation of da290b1de81a045c:0:0 with session name  took 6.691800935s and succeeded
I0000 00:00:1718333209.301198    6235 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(da290b1de81a045c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7164346_5704918248887299234", property.function_library_fingerprint = 3513074581099106476, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size 



I0000 00:00:1718333216.778922    6230 tpu_compile_op_common.cc:245] Compilation of 37d15950961f352:0:0 with session name  took 7.296859876s and succeeded
I0000 00:00:1718333216.827385    6230 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(37d15950961f352:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7164346_5704918248887299234", property.function_library_fingerprint = 3513074581099106476, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718333216.827681    6230 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 2/3
Epoch 3/3
f1 score: 0.747229278087616 and accuracy: 0.838760495185852


[I 2024-06-14 02:47:08,017] Trial 139 finished with value: 0.838760495185852 and parameters: {'batch_size_per_core': 16, 'num_epochs': 3, 'dropout_rate': 0.2341956637275761, 'weight_decay': 0.04830851895406228, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718333482.839351    6227 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2fe30467be6be5ff:0:0), session_name()
I0000 00:00:1718333518.920061    6227 tpu_compile_op_common.cc:245] Compilation of 2fe30467be6be5ff:0:0 with session name  took 36.080632104s and succeeded
I0000 00:00:1718333519.106206    6227 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2fe30467be6be5ff:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7298422_8399272973867061715", property.function_library_fingerprint = 678049911879271557, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_size 



I0000 00:00:1718333521.274717    6237 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(868814a6de168622:0:0), session_name()
I0000 00:00:1718333556.064639    6237 tpu_compile_op_common.cc:245] Compilation of 868814a6de168622:0:0 with session name  took 34.789867341s and succeeded
I0000 00:00:1718333556.228234    6237 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(868814a6de168622:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7298422_8399272973867061715", property.function_library_fingerprint = 678049911879271557, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size 



I0000 00:00:1718333582.777814    6296 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(503a5faed7cde769:0:0), session_name()
I0000 00:00:1718333589.398500    6296 tpu_compile_op_common.cc:245] Compilation of 503a5faed7cde769:0:0 with session name  took 6.620640224s and succeeded
I0000 00:00:1718333589.446344    6296 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(503a5faed7cde769:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7332150_7664931099040333633", property.function_library_fingerprint = 16496647356534233010, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size



I0000 00:00:1718333596.418675    6226 tpu_compile_op_common.cc:245] Compilation of 22e2e97d7a65dc3e:0:0 with session name  took 6.76535763s and succeeded
I0000 00:00:1718333596.475942    6226 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(22e2e97d7a65dc3e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7332150_7664931099040333633", property.function_library_fingerprint = 16496647356534233010, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718333596.476150    6226 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7880639433860779 and accuracy: 0.8366596698760986


[I 2024-06-14 02:53:36,488] Trial 142 finished with value: 0.8366596698760986 and parameters: {'batch_size_per_core': 16, 'num_epochs': 5, 'dropout_rate': 0.198038099797847, 'weight_decay': 0.04107374409835776, 'lr_scheduler_type': 'cosine_with_restarts'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718333875.978214    6279 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(aeade42249480111:0:0), session_name()
I0000 00:00:1718333913.279784    6279 tpu_compile_op_common.cc:245] Compilation of aeade42249480111:0:0 with session name  took 37.301514788s and succeeded
I0000 00:00:1718333913.509377    6279 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aeade42249480111:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7469838_16763306160911135377", property.function_library_fingerprint = 10537485733750885603, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_si



I0000 00:00:1718333915.623876    6281 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(390672bb747a8420:0:0), session_name()
I0000 00:00:1718333954.969484    6281 tpu_compile_op_common.cc:245] Compilation of 390672bb747a8420:0:0 with session name  took 39.345541651s and succeeded
I0000 00:00:1718333955.164048    6281 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(390672bb747a8420:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7469838_16763306160911135377", property.function_library_fingerprint = 10537485733750885603, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_si



I0000 00:00:1718333983.693107    6271 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d5ab08e0f743aa75:0:0), session_name()
I0000 00:00:1718333990.284034    6271 tpu_compile_op_common.cc:245] Compilation of d5ab08e0f743aa75:0:0 with session name  took 6.590848033s and succeeded
I0000 00:00:1718333990.332838    6271 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d5ab08e0f743aa75:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7503566_10769039174582823972", property.function_library_fingerprint = 12460233864660865280, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_siz



I0000 00:00:1718333998.124329    6279 tpu_compile_op_common.cc:245] Compilation of 10cddb7053d08d8b:0:0 with session name  took 7.563674637s and succeeded
I0000 00:00:1718333998.169939    6279 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(10cddb7053d08d8b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7503566_10769039174582823972", property.function_library_fingerprint = 12460233864660865280, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718333998.170214    6279 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7740082144737244 and accuracy: 0.841911792755127


I0000 00:00:1718334105.519543    6222 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(50f24d83870eacaa:0:0), session_name()
I0000 00:00:1718334175.865212    6222 tpu_compile_op_common.cc:245] Compilation of 50f24d83870eacaa:0:0 with session name  took 1m10.34562027s and succeeded
I0000 00:00:1718334176.137473    6222 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(50f24d83870eacaa:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7543110_13648501103024265394", property.function_library_fingerprint = 289212441804904162, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_siz



2024-06-14 03:04:28.862861: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718334270.800189    6313 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(be5dffd8d8f4e0bd:0:0), session_name()
I0000 00:00:1718334276.036384    6313 tpu_compile_op_common.cc:245] Compilation of be5dffd8d8f4e0bd:0:0 with session name  took 5.236144193s and succeeded
I0000 00:00:1718334276.066410    6313 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(be5dffd8d8f4e0bd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_11716563334334850945", property.function_library_fingerprint = 14990571088685478980, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718334290.314169    6306 tpu_compile_op_common.cc:245] Compilation of 2035064e31160f3c:0:0 with session name  took 6.164639086s and succeeded
I0000 00:00:1718334290.377329    6306 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2035064e31160f3c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7576628_249055851980511722", property.function_library_fingerprint = 14990571088685478980, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "8,49,;8,49,;8,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718334290.377590    6306 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 1/5


I0000 00:00:1718334548.747390    6229 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5787ebfe2778fa48:0:0), session_name()
I0000 00:00:1718334590.400696    6229 tpu_compile_op_common.cc:245] Compilation of 5787ebfe2778fa48:0:0 with session name  took 41.653233614s and succeeded
I0000 00:00:1718334590.602268    6229 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5787ebfe2778fa48:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7704912_2218250417388591786", property.function_library_fingerprint = 12602877821113237758, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,50,;32,50,;32,50,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718334630.416542    6220 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6d17c1c7e90df3c8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7704912_2218250417388591786", property.function_library_fingerprint = 12602877821113237758, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718334630.418075    6220 tpu_compilation_cache_interface.cc:541] After adding entry for key 6d17c1c7e90df3c8:0:0 with session_name  cache is 235 entries (46258088848 bytes),  marked for eviction 189 entries (37854261119 bytes).
I0000 00:



I0000 00:00:1718334672.221239    6251 tpu_compile_op_common.cc:245] Compilation of a10b3beaec9b9557:0:0 with session name  took 6.998234528s and succeeded
I0000 00:00:1718334672.278502    6251 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a10b3beaec9b9557:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7738524_14132299815957599416", property.function_library_fingerprint = 16317291229401539946, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718334672.278960    6251 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7876091599464417 and accuracy: 0.8403361439704895


[I 2024-06-14 03:11:28,846] Trial 151 finished with value: 0.8403361439704895 and parameters: {'batch_size_per_core': 32, 'num_epochs': 5, 'dropout_rate': 0.2438992535078157, 'weight_decay': 0.08141142708635488, 'lr_scheduler_type': 'constant'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718334947.618392    6279 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ceb299a8b6fcaba7:0:0), session_name()
I0000 00:00:1718334984.699353    6279 tpu_compile_op_common.cc:245] Compilation of ceb299a8b6fcaba7:0:0 with session name  took 37.080905756s and succeeded
I0000 00:00:1718334984.888923    6279 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ceb299a8b6fcaba7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7875806_4924483250697454806", property.function_library_fingerprint = 15248917175007320965, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_siz



I0000 00:00:1718334987.019706    6237 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(29f0a7b0970e6f2:0:0), session_name()
I0000 00:00:1718335022.889554    6237 tpu_compile_op_common.cc:245] Compilation of 29f0a7b0970e6f2:0:0 with session name  took 35.869782895s and succeeded




I0000 00:00:1718335023.095416    6237 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(29f0a7b0970e6f2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7875806_4924483250697454806", property.function_library_fingerprint = 15248917175007320965, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718335023.097510    6237 tpu_compilation_cache_interface.cc:541] After adding entry for key 29f0a7b0970e6f2:0:0 with session_name  cache is 239 entries (46947363549 bytes),  marked for eviction 193 entries (38558158717 bytes).
I0000 00:00



I0000 00:00:1718335062.567898    6222 tpu_compile_op_common.cc:245] Compilation of 5c0355965c7cbebd:0:0 with session name  took 6.781653367s and succeeded
I0000 00:00:1718335062.613069    6222 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5c0355965c7cbebd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7909472_16183641457232429747", property.function_library_fingerprint = 7654369142008725312, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718335062.613297    6222 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.791064977645874 and accuracy: 0.8413865566253662


[I 2024-06-14 03:18:08,627] Trial 155 finished with value: 0.8413865566253662 and parameters: {'batch_size_per_core': 16, 'num_epochs': 6, 'dropout_rate': 0.21755234355877032, 'weight_decay': 0.09881619088409187, 'lr_scheduler_type': 'constant'}. Best is trial 90 with value: 0.848739504814148.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718335345.793458    6284 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5b235e5302f93b6d:0:0), session_name()
I0000 00:00:1718335383.697366    6284 tpu_compile_op_common.cc:245] Compilation of 5b235e5302f93b6d:0:0 with session name  took 37.903846768s and succeeded
I0000 00:00:1718335383.908440    6284 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5b235e5302f93b6d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8048752_13007625493901783387", property.function_library_fingerprint = 5621519527391072956, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,50,;16,50,;16,50,;16,;", property.guaranteed_constants_siz



I0000 00:00:1718335386.050898    6246 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(921cc626bc62cc74:0:0), session_name()
I0000 00:00:1718335420.778257    6246 tpu_compile_op_common.cc:245] Compilation of 921cc626bc62cc74:0:0 with session name  took 34.727285215s and succeeded
I0000 00:00:1718335420.984246    6246 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(921cc626bc62cc74:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8048752_13007625493901783387", property.function_library_fingerprint = 5621519527391072956, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,50,;10,50,;10,50,;10,;", property.guaranteed_constants_siz



I0000 00:00:1718335447.198882    6226 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(be03e798d6138294:0:0), session_name()
I0000 00:00:1718335453.705917    6226 tpu_compile_op_common.cc:245] Compilation of be03e798d6138294:0:0 with session name  took 6.506987194s and succeeded
I0000 00:00:1718335453.758551    6226 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(be03e798d6138294:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8082418_9816530736297092460", property.function_library_fingerprint = 12368224290936770705, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_size



I0000 00:00:1718335460.352776    6285 tpu_compile_op_common.cc:245] Compilation of 644214a3d6ea79ec:0:0 with session name  took 6.428077312s and succeeded
I0000 00:00:1718335460.408906    6285 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(644214a3d6ea79ec:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8082418_9816530736297092460", property.function_library_fingerprint = 12368224290936770705, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,48,;14,48,;14,48,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718335460.409261    6285 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.8006982207298279 and accuracy: 0.8518907427787781
Epoch 1/3


I0000 00:00:1718335587.439975    6289 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(64db01b384cfdd32:0:0), session_name()
I0000 00:00:1718335659.226699    6289 tpu_compile_op_common.cc:245] Compilation of 64db01b384cfdd32:0:0 with session name  took 1m11.786644858s and succeeded
I0000 00:00:1718335659.502515    6289 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(64db01b384cfdd32:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8129128_2304750596175917792", property.function_library_fingerprint = 8981639724775744604, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "16,48,;16,48,;16,48,;16,;", property.guaranteed_constants_si

Epoch 2/3
Epoch 3/3


2024-06-14 03:29:14.308377: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718335756.226691    6247 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c9151676fc708e7:0:0), session_name()
I0000 00:00:1718335761.670306    6247 tpu_compile_op_common.cc:245] Compilation of c9151676fc708e7:0:0 with session name  took 5.44354136s and succeeded
I0000 00:00:1718335761.703193    6247 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c9151676fc708e7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14915021476359529791", property.function_library_fingerprint = 15552066253688612286, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog



I0000 00:00:1718335774.581582    6297 tpu_compile_op_common.cc:245] Compilation of a8f9e13b100e8445:0:0 with session name  took 5.839805245s and succeeded
I0000 00:00:1718335774.631554    6297 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a8f9e13b100e8445:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8163812_17957081522470553413", property.function_library_fingerprint = 15552066253688612286, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "8,49,;8,49,;8,49,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718335774.632038    6297 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 1/8


I0000 00:00:1718336033.891399    6285 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(66f8f24378e1d357:0:0), session_name()
I0000 00:00:1718336072.071058    6285 tpu_compile_op_common.cc:245] Compilation of 66f8f24378e1d357:0:0 with session name  took 38.179603471s and succeeded
I0000 00:00:1718336072.279947    6285 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(66f8f24378e1d357:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8292096_13387169366636382487", property.function_library_fingerprint = 2000322157258670509, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,50,;128,50,;128,50,;128,;", property.guaranteed_constants



I0000 00:00:1718336138.084435    6286 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cc64e6b59b0894ee:0:0), session_name()
I0000 00:00:1718336146.210819    6286 tpu_compile_op_common.cc:245] Compilation of cc64e6b59b0894ee:0:0 with session name  took 8.126340481s and succeeded
I0000 00:00:1718336146.263985    6286 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc64e6b59b0894ee:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8325708_13295592273937837661", property.function_library_fingerprint = 4595283087621402415, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "128,48,;128,48,;128,48,;128,;", property.guaranteed_constants_



I0000 00:00:1718336153.111371    6260 tpu_compile_op_common.cc:245] Compilation of ac4b6ff809e50926:0:0 with session name  took 6.796315289s and succeeded
I0000 00:00:1718336153.157899    6260 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ac4b6ff809e50926:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8325708_13295592273937837661", property.function_library_fingerprint = 4595283087621402415, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "110,48,;110,48,;110,48,;110,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718336153.158106    6260 tpu_compilation_cache_interface.cc:541] After adding 

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7973519563674927 and accuracy: 0.8135504126548767


[I 2024-06-14 03:36:17,647] Trial 163 finished with value: 0.8135504126548767 and parameters: {'batch_size_per_core': 128, 'num_epochs': 8, 'dropout_rate': 0.26090046072723455, 'weight_decay': 0.01768566413140883, 'lr_scheduler_type': 'constant'}. Best is trial 158 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


[W 2024-06-14 03:38:23,755] Trial 167 failed with parameters: {'batch_size_per_core': 16, 'num_epochs': 7, 'dropout_rate': 0.28496301616932757, 'weight_decay': 0.096026311358755, 'lr_scheduler_type': 'constant'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_5476/3085508420.py", line 228, in objective
    model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset, verbose=1)
  File "/usr/local/lib/python3.10/site-packages/transformers/modeling_tf_utils.py", line 1229, in fit
    return super().fit(*args, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 1804, in fit
    tmp_logs = self.trai