# This notebook attempts to cut down the TPU Cache compiler time on the first epoch since it takes a while

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nlp-getting-started/sample_submission.csv
/kaggle/input/nlp-getting-started/train.csv
/kaggle/input/nlp-getting-started/test.csv
/kaggle/input/certification/BaltimoreCyberTrustRoot.crt.pem


In [None]:
import numpy as np
import pandas as pd
import random
import os
import re
import json
from transformers import set_seed, BertTokenizer, TFBertForSequenceClassification, BertConfig
import tensorflow as tf
%pip install optuna
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)
set_seed(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Install necessary packages for Azure SQL connection
%pip install mysql-connector-python 
%pip install PyMySQL

# Suppress TensorFlow logging
tf.get_logger().setLevel('ERROR')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Suppress other warnings (commented out for now)
# warnings.filterwarnings('ignore')

# Suppress absl TPU cache logging
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

# Additional configuration to suppress specific TPU cache logs
tf.autograph.set_verbosity(3)
tf.get_logger().propagate = False

# Load the training data
train_data = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
kaggle_test_data = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')

# Split the data into 75% training and 25% validation sets
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42, stratify=train_data['target'])

# Clean the text data
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)     # Remove mentions
    text = re.sub(r'\d+', '', text)      # Remove numbers
    text = re.sub(r'[^\w\s#]', '', text)  # Remove punctuation except hashtags
    text = text.lower()                  # Convert to lowercase
    return text

train_data['clean_text'] = train_data['text'].apply(clean_text)
val_data['clean_text'] = val_data['text'].apply(clean_text)
kaggle_test_data['clean_text'] = kaggle_test_data['text'].apply(clean_text)

# Tokenize the text data
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=64,
        padding=True,
        truncation=True,
        return_tensors='tf'
    )

# Encode the clean text data
train_encodings = tokenize_texts(train_data['clean_text'])
val_encodings = tokenize_texts(val_data['clean_text'])
kaggle_test_encodings = tokenize_texts(kaggle_test_data['clean_text'])

train_labels = tf.convert_to_tensor(train_data['target'].values)
val_labels = tf.convert_to_tensor(val_data['target'].values)

def compute_metrics(predictions, labels):
    predictions = np.argmax(predictions, axis=1)
    f1 = f1_score(labels, predictions)
    accuracy = accuracy_score(labels, predictions)
    return {'f1': f1, 'accuracy': accuracy}

def create_tf_dataset(encodings, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((encodings, labels))
    dataset = dataset.cache()  # Cache the dataset
    dataset = dataset.shuffle(10000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)  # Prefetching
    return dataset

# Initialize TPU
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    tpu_cores = strategy.num_replicas_in_sync
    print(f"TPU cores available: {tpu_cores}")
except ValueError:
    print("TPU not found")
    raise SystemExit

# Directory to save models
model_save_dir = './saved_models'
os.makedirs(model_save_dir, exist_ok=True)

# File to store top 5 model predictions
top_predictions_file = 'top_5_predictions.json'

# Load existing top 5 predictions
if os.path.exists(top_predictions_file):
    with open(top_predictions_file, 'r') as file:
        top_predictions = json.load(file)
else:
    top_predictions = []

# Function to save top predictions
def save_top_predictions(predictions_file, val_accuracy, model_number):
    global top_predictions
    
    new_entry = {
        'model_number': model_number,
        'val_accuracy': val_accuracy,
        'predictions_file': predictions_file
    }
    
    # Add the new entry and sort by validation accuracy
    top_predictions.append(new_entry)
    top_predictions = sorted(top_predictions, key=lambda x: x['val_accuracy'], reverse=True)
    
    # If there are more than 5 entries, remove the one with the lowest accuracy
    if len(top_predictions) > 5:
        removed_entry = top_predictions.pop()
        # Check if the file exists before attempting to remove it
        if os.path.exists(removed_entry['predictions_file']):
            os.remove(removed_entry['predictions_file'])
            print(f"File {removed_entry['predictions_file']} has been removed.")
        else:
            print(f"File {removed_entry['predictions_file']} does not exist and cannot be removed.")
    
    # Save the updated top predictions to file
    with open(top_predictions_file, 'w') as file:
        json.dump(top_predictions, file, indent=4)

# Set fixed batch size and learning rate parameters
base_learning_rate = 1e-5
batch_size_per_core = 32
tpu_cores = 8
batch_size = batch_size_per_core * tpu_cores
learning_rate = base_learning_rate * (batch_size / (batch_size_per_core * tpu_cores))

# Create the datasets outside the objective function
train_dataset = create_tf_dataset(dict(train_encodings), train_labels, batch_size)
val_dataset = create_tf_dataset(dict(val_encodings), val_labels, batch_size)
kaggle_test_dataset = tf.data.Dataset.from_tensor_slices(dict(kaggle_test_encodings)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).cache()

fine_tune_encodings = tokenize_texts(val_data['clean_text'])
fine_tune_labels = tf.convert_to_tensor(val_data['target'].values)
fine_tune_dataset = tf.data.Dataset.from_tensor_slices((
    dict(fine_tune_encodings),
    fine_tune_labels
)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).cache()

# Track top 5 models
def objective(trial):
    num_epochs = trial.suggest_int("num_epochs", 1, 20)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.1)
    lr_scheduler_type = trial.suggest_categorical("lr_scheduler_type", ["constant", "linear", "cosine", "cosine_with_restarts"])
    gradient_clip_norm = trial.suggest_float("gradient_clip_norm", 0.0, 1.0)

    with strategy.scope():
        precision = tf.keras.metrics.Precision()
        recall = tf.keras.metrics.Recall()

        def f1_score_custom(y_true, y_pred):
            # Convert logits to predicted labels
            y_pred = tf.argmax(y_pred, axis=1)
            
            # Ensure true labels are in integer format
            y_true = tf.cast(y_true, tf.int64)
            
            # Update the state of precision and recall
            precision.update_state(y_true, y_pred)
            recall.update_state(y_true, y_pred)
            
            # Compute precision and recall values
            precision_result = precision.result()
            recall_result = recall.result()
            
            # Compute F1 score
            f1 = 2 * ((precision_result * recall_result) / (precision_result + recall_result + tf.keras.backend.epsilon()))
            
            return f1

        config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2, hidden_dropout_prob=dropout_rate)
        model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', config=config)

        if lr_scheduler_type == "linear":
            lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000,
                end_learning_rate=0.0,
                power=1.0
            )
        elif lr_scheduler_type == "cosine":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000
            )
        elif lr_scheduler_type == "cosine_with_restarts":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
                initial_learning_rate=learning_rate,
                first_decay_steps=1000
            )
        else:
            lr_schedule = learning_rate

        optimizer = tf.keras.optimizers.experimental.AdamW(
            learning_rate=lr_schedule,
            weight_decay=weight_decay,
            epsilon=1e-8,
            clipnorm=gradient_clip_norm
        )

        model.compile(
            optimizer=optimizer, 
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy'), f1_score_custom],
            steps_per_execution=32  # Experiment with different values like 16, 32, 64
        )

    model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset, verbose=1)

    # Evaluate on validation set
    val_loss, val_accuracy, val_f1_score = model.evaluate(val_dataset, verbose=1)
    print(f"f1 score: {val_f1_score} and accuracy: {val_accuracy}")
    
    avg_score = (val_accuracy + val_f1_score) / 2

    # If the model is in the top 5, proceed with fine-tuning and saving predictions
    if len(top_predictions) < 5 or val_accuracy > min(top_predictions, key=lambda x: x['val_accuracy'])['val_accuracy']:
        # Fine-tuning step
        # Calculate the ratio of training data size to epochs
        training_data_size = len(train_data)
        fine_tune_data_size = len(val_data)
        fine_tune_epochs = max(1, round((fine_tune_data_size / training_data_size) * num_epochs))

        model.fit(fine_tune_dataset, epochs=fine_tune_epochs, verbose=1)
        
        # Make predictions on the Kaggle test dataset
        kaggle_test_predictions = model.predict(kaggle_test_dataset).logits
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()

        # Save the predictions
        predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}" + '_submission.csv') 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(predictions_file, index=False)
    
        # Save the predictions and accuracy to the top 5 list
        save_top_predictions(predictions_file, val_accuracy, trial.number)

    return val_accuracy

# Define your Optuna study, using the MySQL connection string
optuna_storage = 'mysql+pymysql://<username>:<password>@<host>/<database>?ssl_ca=<path_to_CA_cert>&ssl_verify_cert=true'

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
db_password = user_secrets.get_secret("DB_PASSWORD")# This uses the secrets inside of Kaggle so I don't have to explicitly type my password out in code

# Example with your details (replace '<password>' with your real password and '<database>' with your database name)
optuna_storage = f'mysql+pymysql://MichaelAzure:{db_password}@kaggle-third-sql.mysql.database.azure.com/kaggle_disaster_database?ssl_ca=/kaggle/input/certification&ssl_verify_cert=true'

studyName = 'disaster_FastTPU_0'
study = optuna.create_study(study_name=studyName, # name of the study
                            storage=optuna_storage,  # URL for the mySQL schema
                            direction='maximize', # maximize the log loss
                            load_if_exists=True, # makes it so that if the study_name already exists in the schema, then it will append the new trials with the old trials and essentially resume the study. It will also remember the previous trials so it really is resuming the study
                            )

study.optimize(objective, n_trials=100)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# Save top predictions JSON file to output directory for later use
output_top_predictions_file = '/kaggle/working/top_5_predictions.json'
with open(output_top_predictions_file, 'w') as file:
    json.dump(top_predictions, file, indent=4)

print(f"Top 5 predictions saved to {output_top_predictions_file}")


  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1718379875.602275    6097 common_lib.cc:798] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="localhost"
=== Source Location Trace: === 
learning/45eac/tfrc/runtime/common_lib.cc:479
D0614 15:44:35.610605707    6097 config.cc:196]                        gRPC EXPERIMENT call_status_override_on_cancellation   OFF (default:OFF)
D0614 15:44:35.610620233    6097 config.cc:196]                        gRPC EXPERIMENT call_v3                                OFF (default:OFF)
D0614 15:44:35.610623590    6097 config.cc:196]                        gRPC EXPERIMENT canary_client_privacy                  ON  (default:ON)
D0614 15:44:35.610625984    6097 config.cc:196]                        gRPC EXPERIMENT capture_base_context                   ON  (default:ON)
D0614 15:44:35.610628365    6097 config.cc:196]                        gRPC EXPER

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


I0000 00:00:1718379894.833770    6097 service.cc:145] XLA service 0x58f2fed6e280 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1718379894.833824    6097 service.cc:153]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1718379894.833829    6097 service.cc:153]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1718379894.833832    6097 service.cc:153]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1718379894.833835    6097 service.cc:153]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1718379894.833853    6097 service.cc:153]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1718379894.833856    6097 service.cc:153]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1718379894.833859    6097 service.cc:153]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1718379894.833870    6097 service.cc:153]   StreamExecutor device (7): TPU, 2a886c8


TPU cores available: 8


[I 2024-06-14 15:45:03,746] Using an existing study with name 'disaster_FastTPU_0' instead of creating a new one.
I0000 00:00:1718379905.736328    6097 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718380174.660817    6929 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(92519387be42b99d:0:0), session_name()
I0000 00:00:1718380217.858483    6929 tpu_compile_op_common.cc:245] Compilation of 92519387be42b99d:0:0 with session name  took 43.197598497s and succeeded
I0000 00:00:1718380218.092509    6929 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(92519387be42b99d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_133925_8244120198377918538", property.function_library_fingerprint = 5955417321330918499, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718380259.217862    6916 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(75a2eae36ac56c65:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_133925_8244120198377918538", property.function_library_fingerprint = 5955417321330918499, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718380259.218855    6916 tpu_compilation_cache_interface.cc:541] After adding entry for key 75a2eae36ac56c65:0:0 with session_name  cache is 2 entries (614682503 bytes),  marked for eviction 0 entries (0 bytes).
I0000 00:00:1718380284.3672



I0000 00:00:1718380295.632401    6909 tpu_compile_op_common.cc:245] Compilation of 5838ab8cf7f0de06:0:0 with session name  took 5.021742814s and succeeded
I0000 00:00:1718380295.680177    6909 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5838ab8cf7f0de06:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_170970_9397246341361593331", property.function_library_fingerprint = 6217386303393656189, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718380295.680384    6909 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/3
Epoch 3/3
f1 score: 0.7088697552680969 and accuracy: 0.8403361439704895


I0000 00:00:1718380396.562049    6874 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4335b978347f4b97:0:0), session_name()
I0000 00:00:1718380468.880021    6874 tpu_compile_op_common.cc:245] Compilation of 4335b978347f4b97:0:0 with session name  took 1m12.31792749s and succeeded
I0000 00:00:1718380469.155991    6874 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4335b978347f4b97:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_211989_9716065281814031629", property.function_library_fingerprint = 8740341444743327166, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



2024-06-14 15:55:57.799346: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718380559.553583    6840 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c77245f98184050c:0:0), session_name()
I0000 00:00:1718380564.459602    6840 tpu_compile_op_common.cc:245] Compilation of c77245f98184050c:0:0 with session name  took 4.905975499s and succeeded
I0000 00:00:1718380564.486693    6840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c77245f98184050c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_3051610315426836343", property.function_library_fingerprint = 12160291117787565098, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718380575.684721    6876 tpu_compile_op_common.cc:245] Compilation of 505d478cef26b137:0:0 with session name  took 5.296851798s and succeeded
I0000 00:00:1718380575.719970    6876 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(505d478cef26b137:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_248878_2820792170431181724", property.function_library_fingerprint = 12160291117787565098, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718380575.720173    6876 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/6


I0000 00:00:1718380829.153263    6871 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c26cd95ca814e7ae:0:0), session_name()
I0000 00:00:1718380871.491972    6871 tpu_compile_op_common.cc:245] Compilation of c26cd95ca814e7ae:0:0 with session name  took 42.338659386s and succeeded
I0000 00:00:1718380871.704694    6871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c26cd95ca814e7ae:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_387443_1749959567204974298", property.function_library_fingerprint = 9429769849672244352, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718380912.347148    6915 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7d2d774435ab93b4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_387443_1749959567204974298", property.function_library_fingerprint = 9429769849672244352, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718380912.347654    6915 tpu_compilation_cache_interface.cc:541] After adding entry for key 7d2d774435ab93b4:0:0 with session_name  cache is 11 entries (2437001411 bytes),  marked for eviction 0 entries (0 bytes).
I0000 00:00:1718380938.21



I0000 00:00:1718380949.692395    6924 tpu_compile_op_common.cc:245] Compilation of 917ab7c8f129f4a6:0:0 with session name  took 4.931635101s and succeeded
I0000 00:00:1718380949.725209    6924 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(917ab7c8f129f4a6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_424492_7202421432314579158", property.function_library_fingerprint = 15929102900825985688, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718380949.725426    6924 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7050926089286804 and accuracy: 0.8497899174690247
Epoch 1/2


I0000 00:00:1718381061.342680    6877 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1e6342242eff864b:0:0), session_name()
I0000 00:00:1718381133.427737    6877 tpu_compile_op_common.cc:245] Compilation of 1e6342242eff864b:0:0 with session name  took 1m12.084998916s and succeeded
I0000 00:00:1718381133.706678    6877 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1e6342242eff864b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_470771_10157791545026193752", property.function_library_fingerprint = 5503757167217059368, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si

Epoch 2/2


2024-06-14 16:07:07.371630: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718381229.164694    6842 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d338e62741d611b7:0:0), session_name()
I0000 00:00:1718381234.536570    6842 tpu_compile_op_common.cc:245] Compilation of d338e62741d611b7:0:0 with session name  took 5.371809686s and succeeded
I0000 00:00:1718381234.568113    6842 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d338e62741d611b7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13573834009204767100", property.function_library_fingerprint = 15057106835495325392, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718381246.195806    6896 tpu_compile_op_common.cc:245] Compilation of 5dc44faab3470f04:0:0 with session name  took 5.670723394s and succeeded
I0000 00:00:1718381246.244995    6896 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5dc44faab3470f04:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_508278_3388372006856078340", property.function_library_fingerprint = 15057106835495325392, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718381246.245164    6896 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/10


I0000 00:00:1718381504.386338    6871 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b127e62538312c60:0:0), session_name()
I0000 00:00:1718381549.273202    6871 tpu_compile_op_common.cc:245] Compilation of b127e62538312c60:0:0 with session name  took 44.886791309s and succeeded
I0000 00:00:1718381549.476895    6871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b127e62538312c60:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_646843_1301163668595608876", property.function_library_fingerprint = 15982452062745862173, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718381589.606949    6905 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec59ed228f8d3a4f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_646843_1301163668595608876", property.function_library_fingerprint = 15982452062745862173, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718381589.607721    6905 tpu_compilation_cache_interface.cc:541] After adding entry for key ec59ed228f8d3a4f:0:0 with session_name  cache is 20 entries (4259452075 bytes),  marked for eviction 0 entries (0 bytes).
I0000 00:00:1718381613.6



I0000 00:00:1718381624.977286    6851 tpu_compile_op_common.cc:245] Compilation of f46126bc892b0aba:0:0 with session name  took 4.986244684s and succeeded
I0000 00:00:1718381625.011441    6851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f46126bc892b0aba:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_683892_1936748069260707907", property.function_library_fingerprint = 2927141812505267572, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718381625.011660    6851 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.6675601005554199 and accuracy: 0.832457959651947
Epoch 1/3


I0000 00:00:1718381749.192125    6911 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fe32a623efc845eb:0:0), session_name()
I0000 00:00:1718381819.141117    6911 tpu_compile_op_common.cc:245] Compilation of fe32a623efc845eb:0:0 with session name  took 1m9.94894254s and succeeded
I0000 00:00:1718381819.368927    6911 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fe32a623efc845eb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_737179_8649367958031805795", property.function_library_fingerprint = 4188082220738921440, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 

Epoch 2/3
Epoch 3/3


2024-06-14 16:18:34.370602: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718381916.134461    6866 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(22947258afdc07ec:0:0), session_name()
I0000 00:00:1718381921.166662    6866 tpu_compile_op_common.cc:245] Compilation of 22947258afdc07ec:0:0 with session name  took 5.032156553s and succeeded
I0000 00:00:1718381921.191061    6866 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(22947258afdc07ec:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15677702566128283970", property.function_library_fingerprint = 11379291588444225230, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718381932.914456    6844 tpu_compile_op_common.cc:245] Compilation of 903412b1baee2eb:0:0 with session name  took 5.806721864s and succeeded
I0000 00:00:1718381932.957447    6844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(903412b1baee2eb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_775300_15845255563371849531", property.function_library_fingerprint = 11379291588444225230, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718381932.957625    6844 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 1/12


I0000 00:00:1718382188.738655    6924 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f544d7fa32cc00b5:0:0), session_name()
I0000 00:00:1718382233.792467    6924 tpu_compile_op_common.cc:245] Compilation of f544d7fa32cc00b5:0:0 with session name  took 45.053758714s and succeeded
I0000 00:00:1718382234.028867    6924 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f544d7fa32cc00b5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_913865_14211587289524402279", property.function_library_fingerprint = 2075954882012241142, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718382276.799634    6843 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9888c9a49f8f3925:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_913865_14211587289524402279", property.function_library_fingerprint = 2075954882012241142, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718382276.800580    6843 tpu_compilation_cache_interface.cc:541] After adding entry for key 9888c9a49f8f3925:0:0 with session_name  cache is 29 entries (6081904155 bytes),  marked for eviction 0 entries (0 bytes).
I0000 00:00:1718382302.6



I0000 00:00:1718382315.315468    6918 tpu_compile_op_common.cc:245] Compilation of 93e4e6b273bf5a8b:0:0 with session name  took 5.722235218s and succeeded
I0000 00:00:1718382315.361039    6918 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(93e4e6b273bf5a8b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_950914_12310936493706116383", property.function_library_fingerprint = 8210726707787961310, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718382315.361232    6918 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7672959566116333 and accuracy: 0.8440126180648804
Epoch 1/4


I0000 00:00:1718382444.901650    6876 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a01529332a704b1a:0:0), session_name()
I0000 00:00:1718382516.336637    6876 tpu_compile_op_common.cc:245] Compilation of a01529332a704b1a:0:0 with session name  took 1m11.434936367s and succeeded
I0000 00:00:1718382516.614490    6876 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a01529332a704b1a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1007705_11799684098669760861", property.function_library_fingerprint = 6655337666994498023, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/4
Epoch 3/4
Epoch 4/4


2024-06-14 16:30:11.001579: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718382612.778049    6909 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cf6303ec427289d1:0:0), session_name()
I0000 00:00:1718382617.821090    6909 tpu_compile_op_common.cc:245] Compilation of cf6303ec427289d1:0:0 with session name  took 5.043003743s and succeeded
I0000 00:00:1718382617.846385    6909 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cf6303ec427289d1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_16395661485344860836", property.function_library_fingerprint = 11382897410451852895, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718382629.291996    6905 tpu_compile_op_common.cc:245] Compilation of 4b6d1f7d4b4a9e6:0:0 with session name  took 5.317596425s and succeeded
I0000 00:00:1718382629.335028    6905 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4b6d1f7d4b4a9e6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1046440_6297746992355801069", property.function_library_fingerprint = 11382897410451852895, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718382629.335267    6905 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 1/9


I0000 00:00:1718382886.998792    6911 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1af680ee10931e47:0:0), session_name()
I0000 00:00:1718382929.650624    6911 tpu_compile_op_common.cc:245] Compilation of 1af680ee10931e47:0:0 with session name  took 42.651743748s and succeeded
I0000 00:00:1718382929.878594    6911 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1af680ee10931e47:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1185147_16579981086528239223", property.function_library_fingerprint = 4057562985809570712, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718382993.104502    6929 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2c8b1c0a9bcee007:0:0), session_name()
I0000 00:00:1718382999.113805    6929 tpu_compile_op_common.cc:245] Compilation of 2c8b1c0a9bcee007:0:0 with session name  took 6.009250853s and succeeded
I0000 00:00:1718382999.169258    6929 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2c8b1c0a9bcee007:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1222238_1308947957173710253", property.function_library_fingerprint = 14363868687926288503, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718383004.974651    6908 tpu_compile_op_common.cc:245] Compilation of cba6830b31990622:0:0 with session name  took 5.705448709s and succeeded
I0000 00:00:1718383005.021103    6908 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cba6830b31990622:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1222238_1308947957173710253", property.function_library_fingerprint = 14363868687926288503, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718383005.021326    6908 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7825912833213806 and accuracy: 0.8445377945899963
Epoch 1/3


I0000 00:00:1718383129.806993    6879 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fd238ab721ebccbf:0:0), session_name()
I0000 00:00:1718383202.631829    6879 tpu_compile_op_common.cc:245] Compilation of fd238ab721ebccbf:0:0 with session name  took 1m12.824795781s and succeeded
I0000 00:00:1718383202.914709    6879 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fd238ab721ebccbf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1273811_4873308936801857572", property.function_library_fingerprint = 9939842280840005760, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si

Epoch 2/3
Epoch 3/3


2024-06-14 16:41:40.175611: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718383301.999020    6898 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eac441b0cce353f5:0:0), session_name()
I0000 00:00:1718383307.372268    6898 tpu_compile_op_common.cc:245] Compilation of eac441b0cce353f5:0:0 with session name  took 5.373204127s and succeeded
I0000 00:00:1718383307.395983    6898 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eac441b0cce353f5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_9368198206205960284", property.function_library_fingerprint = 15846450640364079877, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718383319.138870    6861 tpu_compile_op_common.cc:245] Compilation of 37cc5430e9641f2:0:0 with session name  took 5.526887268s and succeeded
I0000 00:00:1718383319.189252    6861 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(37cc5430e9641f2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1311974_14076983962402886099", property.function_library_fingerprint = 15846450640364079877, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718383319.189469    6861 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 1/15


I0000 00:00:1718383574.193741    6876 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e960069d0cc1ef0b:0:0), session_name()
I0000 00:00:1718383618.021635    6876 tpu_compile_op_common.cc:245] Compilation of e960069d0cc1ef0b:0:0 with session name  took 43.827837673s and succeeded
I0000 00:00:1718383618.247525    6876 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e960069d0cc1ef0b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1450539_13144337540766086565", property.function_library_fingerprint = 4972428805350493828, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718383680.906086    6871 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(77999ff004fa10ab:0:0), session_name()
I0000 00:00:1718383687.691455    6871 tpu_compile_op_common.cc:245] Compilation of 77999ff004fa10ab:0:0 with session name  took 6.785303338s and succeeded
I0000 00:00:1718383687.750434    6871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(77999ff004fa10ab:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1487588_6815908197394925859", property.function_library_fingerprint = 10649732426972213074, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718383693.635798    6883 tpu_compile_op_common.cc:245] Compilation of 6e462e2468a081b0:0:0 with session name  took 5.772209899s and succeeded
I0000 00:00:1718383693.677049    6883 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6e462e2468a081b0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1487588_6815908197394925859", property.function_library_fingerprint = 10649732426972213074, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718383693.677277    6883 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
f1 score: 0.7993410229682922 and accuracy: 0.8282563090324402


[I 2024-06-14 16:49:01,173] Trial 41 finished with value: 0.8282563090324402 and parameters: {'num_epochs': 15, 'dropout_rate': 0.2834382487201909, 'weight_decay': 0.028090969001920253, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8407168124319919}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718383995.503689    6917 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8b9e35b6f1f7d069:0:0), session_name()
I0000 00:00:1718384040.381346    6917 tpu_compile_op_common.cc:245] Compilation of 8b9e35b6f1f7d069:0:0 with session name  took 44.877607823s and succeeded
I0000 00:00:1718384040.584769    6917 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8b9e35b6f1f7d069:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1652623_2586964710665127131", property.function_library_fingerprint = 9173015838847021522, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718384082.989715    6923 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e0a6fe25ebc926bd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1652623_2586964710665127131", property.function_library_fingerprint = 9173015838847021522, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718384082.990289    6923 tpu_compilation_cache_interface.cc:541] After adding entry for key e0a6fe25ebc926bd:0:0 with session_name  cache is 51 entries (10481328207 bytes),  marked for eviction 10 entries (2153297103 bytes).
I0000 00:00:1



I0000 00:00:1718384119.046472    6853 tpu_compile_op_common.cc:245] Compilation of 63ec79089e4c7908:0:0 with session name  took 5.41295079s and succeeded
I0000 00:00:1718384119.086595    6853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(63ec79089e4c7908:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1689652_11360327534214441124", property.function_library_fingerprint = 847130533865985926, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718384119.086780    6853 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.747181236743927 and accuracy: 0.8303571343421936


[I 2024-06-14 16:55:39,878] Trial 49 finished with value: 0.8303571343421936 and parameters: {'num_epochs': 7, 'dropout_rate': 0.30740860855703006, 'weight_decay': 0.009930504071552373, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.6853427525184856}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718384393.597086    6861 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1fc30aeb8f55a90f:0:0), session_name()
I0000 00:00:1718384436.633945    6861 tpu_compile_op_common.cc:245] Compilation of 1fc30aeb8f55a90f:0:0 with session name  took 43.036802492s and succeeded
I0000 00:00:1718384436.812380    6861 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1fc30aeb8f55a90f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1840727_8748162427231207242", property.function_library_fingerprint = 13870063211897267971, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718384499.655515    6902 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5c7283bcb6daa95f:0:0), session_name()
I0000 00:00:1718384505.897956    6902 tpu_compile_op_common.cc:245] Compilation of 5c7283bcb6daa95f:0:0 with session name  took 6.242399909s and succeeded
I0000 00:00:1718384505.946359    6902 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5c7283bcb6daa95f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1877772_2251580232084992338", property.function_library_fingerprint = 7558263365010506921, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718384511.574748    6837 tpu_compile_op_common.cc:245] Compilation of dd5465df2d62ac2e:0:0 with session name  took 5.526698858s and succeeded
I0000 00:00:1718384511.625694    6837 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dd5465df2d62ac2e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1877772_2251580232084992338", property.function_library_fingerprint = 7558263365010506921, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718384511.626011    6837 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7368096113204956 and accuracy: 0.8413865566253662
Epoch 1/2


I0000 00:00:1718384628.118367    6910 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5a9e9068014c509b:0:0), session_name()
I0000 00:00:1718384705.126745    6910 tpu_compile_op_common.cc:245] Compilation of 5a9e9068014c509b:0:0 with session name  took 1m17.008332948s and succeeded
I0000 00:00:1718384705.408900    6910 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5a9e9068014c509b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1925799_1247690586764953265", property.function_library_fingerprint = 15153755193478804978, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/2


2024-06-14 17:06:41.535274: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718384803.356946    6877 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b98607c89dd1ab98:0:0), session_name()
I0000 00:00:1718384808.872827    6877 tpu_compile_op_common.cc:245] Compilation of b98607c89dd1ab98:0:0 with session name  took 5.51575443s and succeeded
I0000 00:00:1718384808.899710    6877 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b98607c89dd1ab98:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_5591761567832042939", property.function_library_fingerprint = 17694348474713261922, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718384821.041462    6879 tpu_compile_op_common.cc:245] Compilation of 293430caad02a5cc:0:0 with session name  took 5.792202819s and succeeded
I0000 00:00:1718384821.096046    6879 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(293430caad02a5cc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1963302_17824743422678383795", property.function_library_fingerprint = 17694348474713261922, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718384821.096261    6879 tpu_compilation_cache_interface.cc:541] After adding entry f

File ./saved_models/disaster_FastTPU_0_model_trial_17_accuracy_0.8325_avg_score_0.7500_f1_0.6676_submission.csv has been removed.


[I 2024-06-14 17:07:02,150] Trial 57 finished with value: 0.8413865566253662 and parameters: {'num_epochs': 7, 'dropout_rate': 0.3563657457787347, 'weight_decay': 0.09433340044912059, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.9080240307000218}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718385080.559822    6850 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(38aa03843c573ca5:0:0), session_name()
I0000 00:00:1718385126.629550    6850 tpu_compile_op_common.cc:245] Compilation of 38aa03843c573ca5:0:0 with session name  took 46.06965471s and succeeded
I0000 00:00:1718385126.846959    6850 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(38aa03843c573ca5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2102009_8179633160307314527", property.function_library_fingerprint = 238755506160939186, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size =



I0000 00:00:1718385189.792192    6843 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b447288b7d860d0f:0:0), session_name()
I0000 00:00:1718385195.791556    6843 tpu_compile_op_common.cc:245] Compilation of b447288b7d860d0f:0:0 with session name  took 5.99930121s and succeeded
I0000 00:00:1718385195.835671    6843 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b447288b7d860d0f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2139100_9671720477950279644", property.function_library_fingerprint = 15497701880623210039, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718385200.972299    6862 tpu_compile_op_common.cc:245] Compilation of cba01dff16ab71c0:0:0 with session name  took 5.02933416s and succeeded
I0000 00:00:1718385201.010858    6862 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cba01dff16ab71c0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2139100_9671720477950279644", property.function_library_fingerprint = 15497701880623210039, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718385201.011048    6862 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7509214282035828 and accuracy: 0.8298319578170776


[I 2024-06-14 17:13:35,660] Trial 64 finished with value: 0.8298319578170776 and parameters: {'num_epochs': 5, 'dropout_rate': 0.2432403387185163, 'weight_decay': 0.02229743131576803, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.10379645904450727}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


I0000 00:00:1718385473.763791    6871 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(55bee4a4dd0622f7:0:0), session_name()
I0000 00:00:1718385515.720750    6871 tpu_compile_op_common.cc:245] Compilation of 55bee4a4dd0622f7:0:0 with session name  took 41.956781669s and succeeded
I0000 00:00:1718385515.901891    6871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(55bee4a4dd0622f7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2286829_11740017821882833697", property.function_library_fingerprint = 13449270170249765915, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718385555.219589    6857 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(415dd7757153396c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2286829_11740017821882833697", property.function_library_fingerprint = 13449270170249765915, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718385555.220939    6857 tpu_compilation_cache_interface.cc:541] After adding entry for key 415dd7757153396c:0:0 with session_name  cache is 68 entries (13812953663 bytes),  marked for eviction 24 entries (5251790424 bytes).
I0000 00:00



I0000 00:00:1718385590.869788    6893 tpu_compile_op_common.cc:245] Compilation of 78f335e036ab5576:0:0 with session name  took 5.098988405s and succeeded
I0000 00:00:1718385590.909725    6893 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(78f335e036ab5576:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2323920_18116739575408292290", property.function_library_fingerprint = 12634540911324626097, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718385590.909966    6893 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7975108027458191 and accuracy: 0.838760495185852


[I 2024-06-14 17:20:31,610] Trial 66 finished with value: 0.838760495185852 and parameters: {'num_epochs': 12, 'dropout_rate': 0.20516290579000895, 'weight_decay': 0.02086783228239495, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.989809879322481}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718385885.270097    6912 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a2f54bab68d683a0:0:0), session_name()
I0000 00:00:1718385927.837688    6912 tpu_compile_op_common.cc:245] Compilation of a2f54bab68d683a0:0:0 with session name  took 42.567530834s and succeeded
I0000 00:00:1718385928.017603    6912 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a2f54bab68d683a0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2483913_3253971519186773408", property.function_library_fingerprint = 17931137174221286981, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718385994.876146    6886 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(95461222db92d028:0:0), session_name()
I0000 00:00:1718386001.110856    6886 tpu_compile_op_common.cc:245] Compilation of 95461222db92d028:0:0 with session name  took 6.234664494s and succeeded
I0000 00:00:1718386001.169882    6886 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(95461222db92d028:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2521004_5883480980447634351", property.function_library_fingerprint = 6274489816301532504, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718386006.501821    6876 tpu_compile_op_common.cc:245] Compilation of 4093890a348271e6:0:0 with session name  took 5.240353853s and succeeded
I0000 00:00:1718386006.545000    6876 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4093890a348271e6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2521004_5883480980447634351", property.function_library_fingerprint = 6274489816301532504, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718386006.545318    6876 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7351493835449219 and accuracy: 0.8434873819351196
Epoch 1/3


I0000 00:00:1718386124.323002    6860 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d26a1a17621250ea:0:0), session_name()
I0000 00:00:1718386196.971619    6860 tpu_compile_op_common.cc:245] Compilation of d26a1a17621250ea:0:0 with session name  took 1m12.648545599s and succeeded
I0000 00:00:1718386197.248356    6860 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d26a1a17621250ea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2570825_12736662854729037633", property.function_library_fingerprint = 1733910964831085712, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/3
Epoch 3/3


2024-06-14 17:31:29.607306: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718386291.459675    6901 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(63a774f5f588c2bf:0:0), session_name()
I0000 00:00:1718386296.982816    6901 tpu_compile_op_common.cc:245] Compilation of 63a774f5f588c2bf:0:0 with session name  took 5.52309177s and succeeded
I0000 00:00:1718386297.005613    6901 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(63a774f5f588c2bf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14025772985335388372", property.function_library_fingerprint = 3782642590907587078, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718386308.030707    6871 tpu_compile_op_common.cc:245] Compilation of bf4344dff4bf8f64:0:0 with session name  took 5.267455889s and succeeded
I0000 00:00:1718386308.081539    6871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bf4344dff4bf8f64:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2608988_17822324704079595134", property.function_library_fingerprint = 3782642590907587078, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718386308.081927    6871 tpu_compilation_cache_interface.cc:541] After adding entry fo

File ./saved_models/disaster_FastTPU_0_model_trial_1_accuracy_0.8403_avg_score_0.7746_f1_0.7089_submission.csv has been removed.


[I 2024-06-14 17:31:49,118] Trial 73 finished with value: 0.8434873819351196 and parameters: {'num_epochs': 8, 'dropout_rate': 0.347936910913754, 'weight_decay': 0.005401507866604968, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.5224132045389113}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/20


I0000 00:00:1718386564.882751    6910 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ee6c45fa26e00916:0:0), session_name()
I0000 00:00:1718386609.469731    6910 tpu_compile_op_common.cc:245] Compilation of ee6c45fa26e00916:0:0 with session name  took 44.586904385s and succeeded
I0000 00:00:1718386609.697048    6910 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ee6c45fa26e00916:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2747553_2392094938285746460", property.function_library_fingerprint = 5797093664852563449, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718386680.313089    6908 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1e12fcfdeafaf854:0:0), session_name()
I0000 00:00:1718386686.560235    6908 tpu_compile_op_common.cc:245] Compilation of 1e12fcfdeafaf854:0:0 with session name  took 6.247103346s and succeeded
I0000 00:00:1718386686.608096    6908 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1e12fcfdeafaf854:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2784602_11948548348654947151", property.function_library_fingerprint = 17756386093838565357, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718386692.169347    6878 tpu_compile_op_common.cc:245] Compilation of 1105c5c72b1db71f:0:0 with session name  took 5.458230932s and succeeded
I0000 00:00:1718386692.214357    6878 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1105c5c72b1db71f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2784602_11948548348654947151", property.function_library_fingerprint = 17756386093838565357, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718386692.214539    6878 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
f1 score: 0.8225236535072327 and accuracy: 0.8261554837226868


[I 2024-06-14 17:39:16,700] Trial 82 finished with value: 0.8261554837226868 and parameters: {'num_epochs': 20, 'dropout_rate': 0.2738929663382538, 'weight_decay': 0.013347413812222594, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3778852356318007}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/13


I0000 00:00:1718387025.290607    6842 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(620802a7442404ec:0:0), session_name()
I0000 00:00:1718387070.339209    6842 tpu_compile_op_common.cc:245] Compilation of 620802a7442404ec:0:0 with session name  took 45.048544677s and succeeded
I0000 00:00:1718387070.538900    6842 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(620802a7442404ec:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2958453_13740370644370865587", property.function_library_fingerprint = 16489918932996847613, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718387115.139256    6927 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5e22d2b51d932e34:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2958453_13740370644370865587", property.function_library_fingerprint = 16489918932996847613, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718387115.140110    6927 tpu_compilation_cache_interface.cc:541] After adding entry for key 5e22d2b51d932e34:0:0 with session_name  cache is 85 entries (17144779635 bytes),  marked for eviction 42 entries (8897047820 bytes).
I0000 00:00



I0000 00:00:1718387155.131485    6927 tpu_compile_op_common.cc:245] Compilation of becdb0b5f4ae1127:0:0 with session name  took 6.14640423s and succeeded
I0000 00:00:1718387155.185181    6927 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(becdb0b5f4ae1127:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2995498_7105957728380374502", property.function_library_fingerprint = 1364215049610103480, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718387155.185523    6927 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7642877101898193 and accuracy: 0.8440126180648804
Epoch 1/4


I0000 00:00:1718387292.005048    6865 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c0e28535fd2e24f5:0:0), session_name()
I0000 00:00:1718387376.384443    6865 tpu_compile_op_common.cc:245] Compilation of c0e28535fd2e24f5:0:0 with session name  took 1m24.379347788s and succeeded
I0000 00:00:1718387376.630439    6865 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c0e28535fd2e24f5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3054037_3781580207798122134", property.function_library_fingerprint = 9828824494262009779, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si

Epoch 2/4
Epoch 3/4
Epoch 4/4


2024-06-14 17:51:23.452648: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718387485.244629    6862 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5e54f56469e1d91:0:0), session_name()
I0000 00:00:1718387490.525389    6862 tpu_compile_op_common.cc:245] Compilation of 5e54f56469e1d91:0:0 with session name  took 5.280718875s and succeeded
I0000 00:00:1718387490.564461    6862 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5e54f56469e1d91:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_13377651767625653340", property.function_library_fingerprint = 10397858702078793840, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolo



I0000 00:00:1718387502.403355    6872 tpu_compile_op_common.cc:245] Compilation of f19bfea872c0bee8:0:0 with session name  took 5.951738906s and succeeded
I0000 00:00:1718387502.461015    6872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f19bfea872c0bee8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3092768_9276662847237083541", property.function_library_fingerprint = 10397858702078793840, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718387502.461554    6872 tpu_compilation_cache_interface.cc:541] After adding entry fo

File ./saved_models/disaster_FastTPU_0_model_trial_57_accuracy_0.8414_avg_score_0.7891_f1_0.7368_submission.csv has been removed.


[I 2024-06-14 17:51:43,524] Trial 90 finished with value: 0.8440126180648804 and parameters: {'num_epochs': 13, 'dropout_rate': 0.32116500189359765, 'weight_decay': 0.02575324154051397, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.18885964691579069}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718387771.216768    6844 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b18e37b5376d645d:0:0), session_name()
I0000 00:00:1718387817.749170    6844 tpu_compile_op_common.cc:245] Compilation of b18e37b5376d645d:0:0 with session name  took 46.532347972s and succeeded
I0000 00:00:1718387817.967447    6844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b18e37b5376d645d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3231333_16461048890574088881", property.function_library_fingerprint = 16225058795849323747, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718387886.200913    6843 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1fe44e1d02ec649:0:0), session_name()
I0000 00:00:1718387892.842100    6843 tpu_compile_op_common.cc:245] Compilation of 1fe44e1d02ec649:0:0 with session name  took 6.641119089s and succeeded
I0000 00:00:1718387892.900752    6843 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1fe44e1d02ec649:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3268382_11341888939617168493", property.function_library_fingerprint = 2001337211349087101, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size = 



I0000 00:00:1718387899.137218    6868 tpu_compile_op_common.cc:245] Compilation of e1c3db3b1703e53a:0:0 with session name  took 6.123039687s and succeeded
I0000 00:00:1718387899.183872    6868 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e1c3db3b1703e53a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3268382_11341888939617168493", property.function_library_fingerprint = 2001337211349087101, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718387899.184078    6868 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.6734618544578552 and accuracy: 0.8340336084365845


[I 2024-06-14 17:58:31,016] Trial 101 finished with value: 0.8340336084365845 and parameters: {'num_epochs': 4, 'dropout_rate': 0.3741230271392998, 'weight_decay': 0.015794936861170752, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3107333814210731}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718388178.671247    6929 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(af7336d93768b423:0:0), session_name()
I0000 00:00:1718388223.517051    6929 tpu_compile_op_common.cc:245] Compilation of af7336d93768b423:0:0 with session name  took 44.845736348s and succeeded
I0000 00:00:1718388223.718297    6929 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(af7336d93768b423:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3414359_125115668556877054", property.function_library_fingerprint = 5971528204403531868, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718388291.744220    6848 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(133ca16e68e8b92c:0:0), session_name()
I0000 00:00:1718388298.137594    6848 tpu_compile_op_common.cc:245] Compilation of 133ca16e68e8b92c:0:0 with session name  took 6.393316323s and succeeded
I0000 00:00:1718388298.180347    6848 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(133ca16e68e8b92c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3451450_1649830896069010598", property.function_library_fingerprint = 17646068768776784797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718388303.832411    6856 tpu_compile_op_common.cc:245] Compilation of 9e0ec70cfe9e6fb9:0:0 with session name  took 5.539980408s and succeeded
I0000 00:00:1718388303.881244    6856 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9e0ec70cfe9e6fb9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3451450_1649830896069010598", property.function_library_fingerprint = 17646068768776784797, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718388303.881581    6856 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7797735929489136 and accuracy: 0.8356092572212219


[I 2024-06-14 18:05:31,884] Trial 107 finished with value: 0.8356092572212219 and parameters: {'num_epochs': 9, 'dropout_rate': 0.22592513551386884, 'weight_decay': 0.010954861462246765, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.9517158694346808}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


I0000 00:00:1718388596.475434    6849 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(aa68209067d5e0cc:0:0), session_name()
I0000 00:00:1718388641.362087    6849 tpu_compile_op_common.cc:245] Compilation of aa68209067d5e0cc:0:0 with session name  took 44.886598581s and succeeded
I0000 00:00:1718388641.591020    6849 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aa68209067d5e0cc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3606187_11002317214053572281", property.function_library_fingerprint = 2782777121798883425, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718388707.211782    6872 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(86fc4742610c3b2c:0:0), session_name()
I0000 00:00:1718388713.482933    6872 tpu_compile_op_common.cc:245] Compilation of 86fc4742610c3b2c:0:0 with session name  took 6.271099928s and succeeded
I0000 00:00:1718388713.531141    6872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(86fc4742610c3b2c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3643278_16473002812572305973", property.function_library_fingerprint = 9920009794918586349, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718388718.946252    6860 tpu_compile_op_common.cc:245] Compilation of 4e34bbf1e7665057:0:0 with session name  took 5.301518371s and succeeded
I0000 00:00:1718388718.989667    6860 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4e34bbf1e7665057:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3643278_16473002812572305973", property.function_library_fingerprint = 9920009794918586349, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718388718.990054    6860 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7825561165809631 and accuracy: 0.8345588445663452


[I 2024-06-14 18:12:38,379] Trial 114 finished with value: 0.8345588445663452 and parameters: {'num_epochs': 12, 'dropout_rate': 0.2878580428050147, 'weight_decay': 0.008896360815731518, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.32711388585334134}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718389027.098205    6861 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7787f967ee364e37:0:0), session_name()
I0000 00:00:1718389069.776231    6861 tpu_compile_op_common.cc:245] Compilation of 7787f967ee364e37:0:0 with session name  took 42.677953418s and succeeded
I0000 00:00:1718389070.003060    6861 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7787f967ee364e37:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3803271_14775261269698782630", property.function_library_fingerprint = 12944634655254035581, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718389111.200117    6929 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8363620a3ae5a613:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3803271_14775261269698782630", property.function_library_fingerprint = 12944634655254035581, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718389111.201936    6929 tpu_compilation_cache_interface.cc:541] After adding entry for key 8363620a3ae5a613:0:0 with session_name  cache is 106 entries (21231226291 bytes),  marked for eviction 63 entries (12774597070 bytes).
I0000 00:



I0000 00:00:1718389149.933530    6929 tpu_compile_op_common.cc:245] Compilation of c3945853639a0f20:0:0 with session name  took 5.766989481s and succeeded
I0000 00:00:1718389149.971775    6929 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c3945853639a0f20:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3840362_2371149624683285472", property.function_library_fingerprint = 8930455133103488116, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718389149.972278    6929 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7853917479515076 and accuracy: 0.841911792755127


[I 2024-06-14 18:19:41,127] Trial 120 finished with value: 0.841911792755127 and parameters: {'num_epochs': 10, 'dropout_rate': 0.23260614790896456, 'weight_decay': 0.023692364081765425, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.4454773960978178}. Best is trial 23 with value: 0.8513655662536621.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718389449.221063    6851 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b72076102a3c2b0c:0:0), session_name()
I0000 00:00:1718389495.608241    6851 tpu_compile_op_common.cc:245] Compilation of b72076102a3c2b0c:0:0 with session name  took 46.387132394s and succeeded
I0000 00:00:1718389495.850195    6851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b72076102a3c2b0c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3996851_11017491736181544908", property.function_library_fingerprint = 2472428321388656070, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718389563.902974    6855 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c1007c4f322f7220:0:0), session_name()
I0000 00:00:1718389570.709296    6855 tpu_compile_op_common.cc:245] Compilation of c1007c4f322f7220:0:0 with session name  took 6.806262807s and succeeded
I0000 00:00:1718389570.759607    6855 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c1007c4f322f7220:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4033942_8144472784490632214", property.function_library_fingerprint = 14870365667480301648, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718389576.312370    6919 tpu_compile_op_common.cc:245] Compilation of 68c48fe197ee8a24:0:0 with session name  took 5.451958013s and succeeded
I0000 00:00:1718389576.362936    6919 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(68c48fe197ee8a24:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4033942_8144472784490632214", property.function_library_fingerprint = 14870365667480301648, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718389576.363138    6919 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.768798291683197 and accuracy: 0.8518907427787781
Epoch 1/3


I0000 00:00:1718389708.851214    6924 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e5caeadcbcab200a:0:0), session_name()
I0000 00:00:1718389786.642588    6924 tpu_compile_op_common.cc:245] Compilation of e5caeadcbcab200a:0:0 with session name  took 1m17.791260161s and succeeded
I0000 00:00:1718389786.890056    6924 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e5caeadcbcab200a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4087267_1292046876825876302", property.function_library_fingerprint = 7554099246263016358, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_si

Epoch 2/3
Epoch 3/3


2024-06-14 18:31:21.369485: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718389883.230931    6930 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c5cf82d390c5436c:0:0), session_name()
I0000 00:00:1718389888.636345    6930 tpu_compile_op_common.cc:245] Compilation of c5cf82d390c5436c:0:0 with session name  took 5.405372456s and succeeded
I0000 00:00:1718389888.670881    6930 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c5cf82d390c5436c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_15266877691654046417", property.function_library_fingerprint = 2011085230349213860, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718389901.879440    6874 tpu_compile_op_common.cc:245] Compilation of a7fa1ba2bfa120ad:0:0 with session name  took 6.179013541s and succeeded
I0000 00:00:1718389901.936655    6874 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a7fa1ba2bfa120ad:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4125430_9985186557051306359", property.function_library_fingerprint = 2011085230349213860, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718389901.936964    6874 tpu_compilation_cache_interface.cc:541] After adding entry for

File ./saved_models/disaster_FastTPU_0_model_trial_73_accuracy_0.8435_avg_score_0.7893_f1_0.7351_submission.csv has been removed.


[I 2024-06-14 18:31:42,996] Trial 126 finished with value: 0.8518907427787781 and parameters: {'num_epochs': 10, 'dropout_rate': 0.29413096873509487, 'weight_decay': 0.020480998634656557, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.9258737149504944}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718390167.635083    6838 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(568fded58db68198:0:0), session_name()
I0000 00:00:1718390214.241362    6838 tpu_compile_op_common.cc:245] Compilation of 568fded58db68198:0:0 with session name  took 46.606221046s and succeeded
I0000 00:00:1718390214.487339    6838 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(568fded58db68198:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4263923_4255261631361247572", property.function_library_fingerprint = 2677727167966894874, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718390283.111511    6874 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(67c75b6456321348:0:0), session_name()
I0000 00:00:1718390289.858617    6874 tpu_compile_op_common.cc:245] Compilation of 67c75b6456321348:0:0 with session name  took 6.747053028s and succeeded
I0000 00:00:1718390289.909471    6874 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(67c75b6456321348:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4300952_5133151034736620054", property.function_library_fingerprint = 15151633717908619882, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718390295.571583    6854 tpu_compile_op_common.cc:245] Compilation of 36c4d4c6180a9324:0:0 with session name  took 5.532233303s and succeeded
I0000 00:00:1718390295.610629    6854 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(36c4d4c6180a9324:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4300952_5133151034736620054", property.function_library_fingerprint = 15151633717908619882, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718390295.610851    6854 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7663522958755493 and accuracy: 0.8308823704719543


[I 2024-06-14 18:38:42,130] Trial 136 finished with value: 0.8308823704719543 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2932253827664887, 'weight_decay': 0.019356602801526557, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.5914919369050161}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2


I0000 00:00:1718390591.139952    6868 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f4a970fe19f92075:0:0), session_name()
I0000 00:00:1718390636.350431    6868 tpu_compile_op_common.cc:245] Compilation of f4a970fe19f92075:0:0 with session name  took 45.210428585s and succeeded
I0000 00:00:1718390636.569240    6868 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f4a970fe19f92075:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4453795_11082107170570322703", property.function_library_fingerprint = 2988861636923346088, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718390676.995203    6895 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec2209485f4d35b1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4453795_11082107170570322703", property.function_library_fingerprint = 2988861636923346088, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718390676.996532    6895 tpu_compilation_cache_interface.cc:541] After adding entry for key ec2209485f4d35b1:0:0 with session_name  cache is 123 entries (24562777873 bytes),  marked for eviction 80 entries (16106586217 bytes).
I0000 00:0



I0000 00:00:1718390714.952552    6908 tpu_compile_op_common.cc:245] Compilation of a68abeee8680d25:0:0 with session name  took 5.422537602s and succeeded
I0000 00:00:1718390714.988592    6908 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a68abeee8680d25:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4490844_1308871996333505141", property.function_library_fingerprint = 1808435485535438150, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718390714.988836    6908 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 2/2
f1 score: 0.6197041273117065 and accuracy: 0.805672287940979


[I 2024-06-14 18:45:22,400] Trial 142 finished with value: 0.805672287940979 and parameters: {'num_epochs': 2, 'dropout_rate': 0.33281577285575986, 'weight_decay': 0.013533547083470418, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8848596078090585}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718390989.476091    6867 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(32b04c2a5c201bdd:0:0), session_name()
I0000 00:00:1718391035.147063    6867 tpu_compile_op_common.cc:245] Compilation of 32b04c2a5c201bdd:0:0 with session name  took 45.67090017s and succeeded
I0000 00:00:1718391035.363303    6867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(32b04c2a5c201bdd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4633175_5091146121235479973", property.function_library_fingerprint = 18234608579178389679, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718391104.453002    6847 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(87ee610655c8cbb2:0:0), session_name()
I0000 00:00:1718391111.056487    6847 tpu_compile_op_common.cc:245] Compilation of 87ee610655c8cbb2:0:0 with session name  took 6.603381523s and succeeded
I0000 00:00:1718391111.104476    6847 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(87ee610655c8cbb2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4670224_6534713516218825921", property.function_library_fingerprint = 17973688927460004710, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718391116.858698    6853 tpu_compile_op_common.cc:245] Compilation of e0f794b545ad1f8f:0:0 with session name  took 5.627410719s and succeeded
I0000 00:00:1718391116.900704    6853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e0f794b545ad1f8f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4670224_6534713516218825921", property.function_library_fingerprint = 17973688927460004710, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718391116.900935    6853 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7465002536773682 and accuracy: 0.8356092572212219


[I 2024-06-14 18:52:20,251] Trial 149 finished with value: 0.8356092572212219 and parameters: {'num_epochs': 7, 'dropout_rate': 0.30639818247156086, 'weight_decay': 0.04573556708563457, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9773125024279248}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718391404.803109    6862 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(43a7794fd2007d30:0:0), session_name()
I0000 00:00:1718391449.887769    6862 tpu_compile_op_common.cc:245] Compilation of 43a7794fd2007d30:0:0 with session name  took 45.084595848s and succeeded
I0000 00:00:1718391450.127441    6862 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(43a7794fd2007d30:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4821457_11321587926851858887", property.function_library_fingerprint = 4011615746535215929, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718391494.782555    6928 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(671af1116aed0f9a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4821457_11321587926851858887", property.function_library_fingerprint = 4011615746535215929, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718391494.783537    6928 tpu_compilation_cache_interface.cc:541] After adding entry for key 671af1116aed0f9a:0:0 with session_name  cache is 131 entries (26071864583 bytes),  marked for eviction 88 entries (17730678069 bytes).
I0000 00:0



I0000 00:00:1718391535.965404    6913 tpu_compile_op_common.cc:245] Compilation of 34fc5289029ca53c:0:0 with session name  took 6.417349415s and succeeded
I0000 00:00:1718391536.017113    6913 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(34fc5289029ca53c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4858548_7750062831934224875", property.function_library_fingerprint = 10965840148019433679, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718391536.017349    6913 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.770517110824585 and accuracy: 0.8272058963775635


[I 2024-06-14 18:59:20,745] Trial 156 finished with value: 0.8272058963775635 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2738946953588859, 'weight_decay': 0.05137269308869573, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.6621093902914815}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718391827.793225    6897 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3e56015bd4cf1548:0:0), session_name()
I0000 00:00:1718391872.368202    6897 tpu_compile_op_common.cc:245] Compilation of 3e56015bd4cf1548:0:0 with session name  took 44.574929463s and succeeded
I0000 00:00:1718391872.589533    6897 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3e56015bd4cf1548:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5011391_5204923134114104667", property.function_library_fingerprint = 5397938308842496115, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718391941.643811    6845 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(637267128be09c71:0:0), session_name()
I0000 00:00:1718391948.057989    6845 tpu_compile_op_common.cc:245] Compilation of 637267128be09c71:0:0 with session name  took 6.414116876s and succeeded
I0000 00:00:1718391948.104963    6845 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(637267128be09c71:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5048440_16078741043128041427", property.function_library_fingerprint = 13052956577871380407, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718391954.340496    6899 tpu_compile_op_common.cc:245] Compilation of b649720801a86d3b:0:0 with session name  took 6.108749233s and succeeded
I0000 00:00:1718391954.391599    6899 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b649720801a86d3b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5048440_16078741043128041427", property.function_library_fingerprint = 13052956577871380407, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718391954.391836    6899 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.723112165927887 and accuracy: 0.8424369692802429


[I 2024-06-14 19:06:12,514] Trial 161 finished with value: 0.8424369692802429 and parameters: {'num_epochs': 5, 'dropout_rate': 0.32197777690139784, 'weight_decay': 0.007000876483935374, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.519480046973207}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718392238.690936    6868 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6d35d048017e21d7:0:0), session_name()
I0000 00:00:1718392286.569936    6868 tpu_compile_op_common.cc:245] Compilation of 6d35d048017e21d7:0:0 with session name  took 47.878939704s and succeeded
I0000 00:00:1718392286.809836    6868 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6d35d048017e21d7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5196027_8937820706802257545", property.function_library_fingerprint = 9089431338485331374, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718392355.643584    6873 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(71ec9d2c84053b37:0:0), session_name()
I0000 00:00:1718392362.005567    6873 tpu_compile_op_common.cc:245] Compilation of 71ec9d2c84053b37:0:0 with session name  took 6.361918195s and succeeded
I0000 00:00:1718392362.052623    6873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(71ec9d2c84053b37:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5233076_11235574632426827407", property.function_library_fingerprint = 10782838290580210613, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718392368.102582    6871 tpu_compile_op_common.cc:245] Compilation of 16b59f2f5d2779fe:0:0 with session name  took 5.945495863s and succeeded
I0000 00:00:1718392368.158906    6871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(16b59f2f5d2779fe:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5233076_11235574632426827407", property.function_library_fingerprint = 10782838290580210613, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718392368.159229    6871 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7392851114273071 and accuracy: 0.8424369692802429


[I 2024-06-14 19:13:08,170] Trial 168 finished with value: 0.8424369692802429 and parameters: {'num_epochs': 6, 'dropout_rate': 0.28545347783798275, 'weight_decay': 0.008246449124655124, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9396922463104322}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718392657.319311    6876 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(970cee2d97469ac1:0:0), session_name()
I0000 00:00:1718392702.058062    6876 tpu_compile_op_common.cc:245] Compilation of 970cee2d97469ac1:0:0 with session name  took 44.738677149s and succeeded
I0000 00:00:1718392702.303931    6876 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(970cee2d97469ac1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5382415_10861266953328342472", property.function_library_fingerprint = 1515126481819726098, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718392770.209650    6873 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f7334661353f99dc:0:0), session_name()
I0000 00:00:1718392777.902618    6873 tpu_compile_op_common.cc:245] Compilation of f7334661353f99dc:0:0 with session name  took 7.692909513s and succeeded
I0000 00:00:1718392777.963761    6873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f7334661353f99dc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5419464_13597472550204945528", property.function_library_fingerprint = 9667370855894302568, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718392784.375159    6908 tpu_compile_op_common.cc:245] Compilation of 259135cbda2cc67e:0:0 with session name  took 6.299817525s and succeeded
I0000 00:00:1718392784.424325    6908 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(259135cbda2cc67e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5419464_13597472550204945528", property.function_library_fingerprint = 9667370855894302568, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718392784.424554    6908 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7198166251182556 and accuracy: 0.8371848464012146


[I 2024-06-14 19:20:12,140] Trial 174 finished with value: 0.8371848464012146 and parameters: {'num_epochs': 9, 'dropout_rate': 0.4109850656439548, 'weight_decay': 0.021413636534871525, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.714672582898057}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718393075.917464    6890 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e06013af48a652e0:0:0), session_name()
I0000 00:00:1718393120.830053    6890 tpu_compile_op_common.cc:245] Compilation of e06013af48a652e0:0:0 with session name  took 44.912543647s and succeeded
I0000 00:00:1718393121.076956    6890 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e06013af48a652e0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5574201_6549578318312414269", property.function_library_fingerprint = 8564117154665879020, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718393189.767513    6918 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9cb8df43812a7b70:0:0), session_name()
I0000 00:00:1718393196.265614    6918 tpu_compile_op_common.cc:245] Compilation of 9cb8df43812a7b70:0:0 with session name  took 6.498060933s and succeeded
I0000 00:00:1718393196.326465    6918 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9cb8df43812a7b70:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5611292_4003850950071092260", property.function_library_fingerprint = 6843487082965856884, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718393201.799617    6897 tpu_compile_op_common.cc:245] Compilation of 7a3c5bb059e777e:0:0 with session name  took 5.342248542s and succeeded
I0000 00:00:1718393201.841808    6897 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7a3c5bb059e777e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5611292_4003850950071092260", property.function_library_fingerprint = 6843487082965856884, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718393201.842115    6897 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7676615118980408 and accuracy: 0.8377100825309753


[I 2024-06-14 19:27:03,402] Trial 183 finished with value: 0.8377100825309753 and parameters: {'num_epochs': 7, 'dropout_rate': 0.20299298653410527, 'weight_decay': 0.0020879530117146198, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.9992826758112858}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718393489.071866    6839 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8124fc3518385bb8:0:0), session_name()
I0000 00:00:1718393534.924212    6839 tpu_compile_op_common.cc:245] Compilation of 8124fc3518385bb8:0:0 with session name  took 45.852258054s and succeeded
I0000 00:00:1718393535.143747    6839 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8124fc3518385bb8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5762383_8089417300538340189", property.function_library_fingerprint = 6150228509967035859, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718393606.143246    6856 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f9c5a3235996864d:0:0), session_name()
I0000 00:00:1718393612.558063    6856 tpu_compile_op_common.cc:245] Compilation of f9c5a3235996864d:0:0 with session name  took 6.414742752s and succeeded
I0000 00:00:1718393612.609011    6856 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f9c5a3235996864d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5799432_15177368064134588955", property.function_library_fingerprint = 2646457064178424139, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718393618.965764    6876 tpu_compile_op_common.cc:245] Compilation of 1106d6b7262fc3ac:0:0 with session name  took 6.211976815s and succeeded
I0000 00:00:1718393619.015941    6876 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1106d6b7262fc3ac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5799432_15177368064134588955", property.function_library_fingerprint = 2646457064178424139, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718393619.016294    6876 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.737926721572876 and accuracy: 0.8450630307197571


I0000 00:00:1718393730.527345    6926 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(957530c4ba9e1265:0:0), session_name()
I0000 00:00:1718393808.640475    6926 tpu_compile_op_common.cc:245] Compilation of 957530c4ba9e1265:0:0 with session name  took 1m18.113081787s and succeeded
I0000 00:00:1718393808.935449    6926 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(957530c4ba9e1265:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5842207_8117367611213189647", property.function_library_fingerprint = 13227671248462607500, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s



2024-06-14 19:38:28.521351: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718393910.439521    6844 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d4407d003ad1a51e:0:0), session_name()
I0000 00:00:1718393915.921756    6844 tpu_compile_op_common.cc:245] Compilation of d4407d003ad1a51e:0:0 with session name  took 5.482165695s and succeeded
I0000 00:00:1718393915.962978    6844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d4407d003ad1a51e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_17095663353749170474", property.function_library_fingerprint = 3792906090456329529, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718393928.562090    6840 tpu_compile_op_common.cc:245] Compilation of 6815d6744c36270e:0:0 with session name  took 5.842167678s and succeeded
I0000 00:00:1718393928.595418    6840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6815d6744c36270e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5879100_2119007302125131313", property.function_library_fingerprint = 3792906090456329529, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718393928.595662    6840 tpu_compilation_cache_interface.cc:541] After adding entry for

File ./saved_models/disaster_FastTPU_0_model_trial_90_accuracy_0.8440_avg_score_0.8042_f1_0.7643_submission.csv has been removed.


[I 2024-06-14 19:38:49,653] Trial 189 finished with value: 0.8450630307197571 and parameters: {'num_epochs': 4, 'dropout_rate': 0.14067575241689495, 'weight_decay': 0.006578064663144618, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7736567937989831}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718394197.947950    6853 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4238cb99abb19754:0:0), session_name()
I0000 00:00:1718394242.403878    6853 tpu_compile_op_common.cc:245] Compilation of 4238cb99abb19754:0:0 with session name  took 44.455878612s and succeeded
I0000 00:00:1718394242.600336    6853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4238cb99abb19754:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6017807_13884276447699343278", property.function_library_fingerprint = 11899821464787797262, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718394283.328776    6873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(53dcb37c1e895497:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6017807_13884276447699343278", property.function_library_fingerprint = 11899821464787797262, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718394283.330178    6873 tpu_compilation_cache_interface.cc:541] After adding entry for key 53dcb37c1e895497:0:0 with session_name  cache is 160 entries (31667141389 bytes),  marked for eviction 116 entries (23123624329 bytes).
I0000 00



I0000 00:00:1718394322.677333    6894 tpu_compile_op_common.cc:245] Compilation of c9997ef1a9d05857:0:0 with session name  took 5.546297693s and succeeded
I0000 00:00:1718394322.724718    6894 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c9997ef1a9d05857:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6054898_16175643805185471707", property.function_library_fingerprint = 13115636357794162816, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718394322.724979    6894 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7688478231430054 and accuracy: 0.8403361439704895


[I 2024-06-14 19:45:44,283] Trial 198 finished with value: 0.8403361439704895 and parameters: {'num_epochs': 7, 'dropout_rate': 0.23143635873282983, 'weight_decay': 0.009382130981019327, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.06789698381742637}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718394613.475325    6894 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4b4ac237a7060a93:0:0), session_name()
I0000 00:00:1718394657.445173    6894 tpu_compile_op_common.cc:245] Compilation of 4b4ac237a7060a93:0:0 with session name  took 43.969798926s and succeeded
I0000 00:00:1718394657.666298    6894 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4b4ac237a7060a93:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6205989_172653239231605734", property.function_library_fingerprint = 9194407406062968372, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718394700.203161    6890 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(50f3d3991c3ce4e0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6205989_172653239231605734", property.function_library_fingerprint = 9194407406062968372, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718394700.204588    6890 tpu_compilation_cache_interface.cc:541] After adding entry for key 50f3d3991c3ce4e0:0:0 with session_name  cache is 164 entries (32421619495 bytes),  marked for eviction 120 entries (23887908593 bytes).
I0000 00:00



I0000 00:00:1718394739.596786    6895 tpu_compile_op_common.cc:245] Compilation of b22560fe5bd3f9f6:0:0 with session name  took 5.57533475s and succeeded
I0000 00:00:1718394739.643283    6895 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b22560fe5bd3f9f6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6243038_3042028811676402088", property.function_library_fingerprint = 17802920678808115642, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718394739.643547    6895 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7718356847763062 and accuracy: 0.8513655662536621
Epoch 1/3


I0000 00:00:1718394870.036797    6859 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(571a7764886f02cf:0:0), session_name()
I0000 00:00:1718394947.740807    6859 tpu_compile_op_common.cc:245] Compilation of 571a7764886f02cf:0:0 with session name  took 1m17.703940795s and succeeded
I0000 00:00:1718394947.981890    6859 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(571a7764886f02cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6296325_7814811786247766316", property.function_library_fingerprint = 15603472401432534497, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/3
Epoch 3/3


2024-06-14 19:57:27.666654: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718395049.476397    6849 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b26a987bb6772953:0:0), session_name()
I0000 00:00:1718395055.281946    6849 tpu_compile_op_common.cc:245] Compilation of b26a987bb6772953:0:0 with session name  took 5.805476182s and succeeded
I0000 00:00:1718395055.331896    6849 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b26a987bb6772953:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8134608969567778815", property.function_library_fingerprint = 15816948338343233941, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718395068.153881    6901 tpu_compile_op_common.cc:245] Compilation of e3ebc4d5bf1fc143:0:0 with session name  took 5.876799433s and succeeded
I0000 00:00:1718395068.200983    6901 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e3ebc4d5bf1fc143:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6334446_13451444449532159718", property.function_library_fingerprint = 15816948338343233941, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718395068.201283    6901 tpu_compilation_cache_interface.cc:541] After adding entry f

File ./saved_models/disaster_FastTPU_0_model_trial_25_accuracy_0.8440_avg_score_0.8057_f1_0.7673_submission.csv has been removed.


[I 2024-06-14 19:57:49,262] Trial 206 finished with value: 0.8513655662536621 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2622675800701551, 'weight_decay': 0.01750822114238646, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8082957859444202}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


I0000 00:00:1718395337.260716    6905 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(df81a5ee7ddacb3e:0:0), session_name()
I0000 00:00:1718395383.543095    6905 tpu_compile_op_common.cc:245] Compilation of df81a5ee7ddacb3e:0:0 with session name  took 46.282307677s and succeeded
I0000 00:00:1718395383.744739    6905 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(df81a5ee7ddacb3e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6473011_12069257214827781735", property.function_library_fingerprint = 4996922182224668556, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718395426.983915    6928 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7f472574902b562f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6473011_12069257214827781735", property.function_library_fingerprint = 4996922182224668556, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718395426.985295    6928 tpu_compilation_cache_interface.cc:541] After adding entry for key 7f472574902b562f:0:0 with session_name  cache is 173 entries (34244249991 bytes),  marked for eviction 130 entries (25788065266 bytes).
I0000 00:



I0000 00:00:1718395468.538126    6840 tpu_compile_op_common.cc:245] Compilation of 61354c99079f8a72:0:0 with session name  took 6.6719285s and succeeded
I0000 00:00:1718395468.616788    6840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(61354c99079f8a72:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6510060_4199692712251195269", property.function_library_fingerprint = 16288839347646316859, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718395468.617383    6840 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7792548537254333 and accuracy: 0.8403361439704895


[I 2024-06-14 20:05:03,160] Trial 217 finished with value: 0.8403361439704895 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2522274230353987, 'weight_decay': 0.018541715565357424, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.00898323710685986}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718395771.401049    6844 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(57b9c8fc739348fb:0:0), session_name()
I0000 00:00:1718395814.675687    6844 tpu_compile_op_common.cc:245] Compilation of 57b9c8fc739348fb:0:0 with session name  took 43.274562971s and succeeded
I0000 00:00:1718395814.864479    6844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(57b9c8fc739348fb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6668143_8124828825260537978", property.function_library_fingerprint = 12736386989407457487, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718395885.572432    6840 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d2eae63e7469b267:0:0), session_name()
I0000 00:00:1718395892.696664    6840 tpu_compile_op_common.cc:245] Compilation of d2eae63e7469b267:0:0 with session name  took 7.122220086s and succeeded
I0000 00:00:1718395892.774045    6840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d2eae63e7469b267:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6705188_7067850752216602259", property.function_library_fingerprint = 8232149233702719632, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718395899.008425    6908 tpu_compile_op_common.cc:245] Compilation of 2a7736e110684c3e:0:0 with session name  took 6.119263478s and succeeded
I0000 00:00:1718395899.069279    6908 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2a7736e110684c3e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6705188_7067850752216602259", property.function_library_fingerprint = 8232149233702719632, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718395899.069815    6908 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7669123411178589 and accuracy: 0.8356092572212219


[I 2024-06-14 20:12:07,095] Trial 225 finished with value: 0.8356092572212219 and parameters: {'num_epochs': 9, 'dropout_rate': 0.22826657398512668, 'weight_decay': 0.057010557752901966, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.9251312907772097}. Best is trial 126 with value: 0.8518907427787781.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718396189.783927    6912 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bbe4004777dea2a5:0:0), session_name()
I0000 00:00:1718396236.653944    6912 tpu_compile_op_common.cc:245] Compilation of bbe4004777dea2a5:0:0 with session name  took 46.86995101s and succeeded
I0000 00:00:1718396236.883329    6912 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bbe4004777dea2a5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6859767_10063018598967036594", property.function_library_fingerprint = 12025834955877619320, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718396305.821762    6890 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d6ffb91d3f6ea8fd:0:0), session_name()
I0000 00:00:1718396312.755286    6890 tpu_compile_op_common.cc:245] Compilation of d6ffb91d3f6ea8fd:0:0 with session name  took 6.933460402s and succeeded
I0000 00:00:1718396312.805460    6890 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d6ffb91d3f6ea8fd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6896812_14875109551051398159", property.function_library_fingerprint = 16200215206610465629, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718396318.913704    6841 tpu_compile_op_common.cc:245] Compilation of 9985adcfff70c9e6:0:0 with session name  took 5.973717528s and succeeded
I0000 00:00:1718396318.961530    6841 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9985adcfff70c9e6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6896812_14875109551051398159", property.function_library_fingerprint = 16200215206610465629, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718396318.961757    6841 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7867032885551453 and accuracy: 0.8408613204956055


[I 2024-06-14 20:19:03,509] Trial 231 finished with value: 0.8408613204956055 and parameters: {'num_epochs': 8, 'dropout_rate': 0.22350729739017994, 'weight_decay': 0.021452220670507754, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.5832304468100009}. Best is trial 221 with value: 0.8524159789085388.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


I0000 00:00:1718396609.568312    6856 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2cafd1c009ad5860:0:0), session_name()
I0000 00:00:1718396655.299868    6856 tpu_compile_op_common.cc:245] Compilation of 2cafd1c009ad5860:0:0 with session name  took 45.731483624s and succeeded
I0000 00:00:1718396655.505894    6856 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2cafd1c009ad5860:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7049639_2158961567209009524", property.function_library_fingerprint = 6598491131183597195, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718396701.171365    6910 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(46879f893d88dfd6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7049639_2158961567209009524", property.function_library_fingerprint = 6598491131183597195, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718396701.172869    6910 tpu_compilation_cache_interface.cc:541] After adding entry for key 46879f893d88dfd6:0:0 with session_name  cache is 185 entries (36507642369 bytes),  marked for eviction 142 entries (28051557145 bytes).
I0000 00:0



I0000 00:00:1718396739.700708    6902 tpu_compile_op_common.cc:245] Compilation of 67452bc6ea5b2ed8:0:0 with session name  took 5.721387465s and succeeded
I0000 00:00:1718396739.747254    6902 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(67452bc6ea5b2ed8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7086684_11132167669050564055", property.function_library_fingerprint = 5736495156912153069, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718396739.747455    6902 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7671725153923035 and accuracy: 0.825630247592926


[I 2024-06-14 20:26:16,538] Trial 237 finished with value: 0.825630247592926 and parameters: {'num_epochs': 11, 'dropout_rate': 0.279000228415014, 'weight_decay': 0.015135672384998224, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.8705547745810647}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718397045.474703    6888 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d936a6c6b432144f:0:0), session_name()
I0000 00:00:1718397092.443924    6888 tpu_compile_op_common.cc:245] Compilation of d936a6c6b432144f:0:0 with session name  took 46.969130702s and succeeded
I0000 00:00:1718397092.660858    6888 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d936a6c6b432144f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7244767_10141449570359291992", property.function_library_fingerprint = 16402686347332490684, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718397163.017382    6913 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5f03e07803932bfc:0:0), session_name()
I0000 00:00:1718397169.629970    6913 tpu_compile_op_common.cc:245] Compilation of 5f03e07803932bfc:0:0 with session name  took 6.61254544s and succeeded
I0000 00:00:1718397169.678868    6913 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5f03e07803932bfc:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7281812_10937758186218666309", property.function_library_fingerprint = 18160620309611962363, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718397175.824361    6851 tpu_compile_op_common.cc:245] Compilation of 181c9492217ba802:0:0 with session name  took 5.989872094s and succeeded
I0000 00:00:1718397175.875525    6851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(181c9492217ba802:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7281812_10937758186218666309", property.function_library_fingerprint = 18160620309611962363, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718397175.875787    6851 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7754583358764648 and accuracy: 0.8224790096282959


[I 2024-06-14 20:33:27,201] Trial 244 finished with value: 0.8224790096282959 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2923894791341282, 'weight_decay': 0.058218254119587566, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.4253594884080787}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


I0000 00:00:1718397478.662404    6875 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9dd559aed699be3:0:0), session_name()
I0000 00:00:1718397524.870729    6875 tpu_compile_op_common.cc:245] Compilation of 9dd559aed699be3:0:0 with session name  took 46.208275923s and succeeded
I0000 00:00:1718397525.070287    6875 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9dd559aed699be3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7438143_15230261125328997295", property.function_library_fingerprint = 10028125618486304571, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718397593.852124    6878 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9be47656b05f40c8:0:0), session_name()
I0000 00:00:1718397600.372099    6878 tpu_compile_op_common.cc:245] Compilation of 9be47656b05f40c8:0:0 with session name  took 6.51990648s and succeeded
I0000 00:00:1718397600.427532    6878 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9be47656b05f40c8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7475188_4874223662320297033", property.function_library_fingerprint = 5709263102913342819, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size =



I0000 00:00:1718397606.657320    6906 tpu_compile_op_common.cc:245] Compilation of b08af163e1e5a07d:0:0 with session name  took 6.106692053s and succeeded
I0000 00:00:1718397606.695949    6906 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b08af163e1e5a07d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7475188_4874223662320297033", property.function_library_fingerprint = 5709263102913342819, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718397606.696148    6906 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7733193635940552 and accuracy: 0.8272058963775635


[I 2024-06-14 20:40:46,247] Trial 252 finished with value: 0.8272058963775635 and parameters: {'num_epochs': 12, 'dropout_rate': 0.3084983522733499, 'weight_decay': 0.06171787769843184, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.9242891795866723}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


I0000 00:00:1718397912.638028    6859 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d4d0decc9f5a5810:0:0), session_name()
I0000 00:00:1718397960.600066    6859 tpu_compile_op_common.cc:245] Compilation of d4d0decc9f5a5810:0:0 with session name  took 47.961980026s and succeeded
I0000 00:00:1718397960.823089    6859 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d4d0decc9f5a5810:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7635039_9580972394567674277", property.function_library_fingerprint = 6534250659504544582, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718398006.530241    6886 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(21df093330809f10:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7635039_9580972394567674277", property.function_library_fingerprint = 6534250659504544582, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718398006.531353    6886 tpu_compilation_cache_interface.cc:541] After adding entry for key 21df093330809f10:0:0 with session_name  cache is 197 entries (38771048471 bytes),  marked for eviction 154 entries (30430282443 bytes).
I0000 00:0



I0000 00:00:1718398046.861945    6907 tpu_compile_op_common.cc:245] Compilation of 7c7c622854a9062:0:0 with session name  took 5.565826847s and succeeded
I0000 00:00:1718398046.898201    6907 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7c7c622854a9062:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7672088_4451765595917185418", property.function_library_fingerprint = 8260639528456908319, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718398046.898430    6907 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7928537130355835 and accuracy: 0.8392857313156128


[I 2024-06-14 20:48:01,250] Trial 260 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 11, 'dropout_rate': 0.25841996829801533, 'weight_decay': 0.011713517072510457, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8138902774385665}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


I0000 00:00:1718398350.802572    6866 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cb40e37375553abb:0:0), session_name()
I0000 00:00:1718398397.152861    6866 tpu_compile_op_common.cc:245] Compilation of cb40e37375553abb:0:0 with session name  took 46.350241814s and succeeded
I0000 00:00:1718398397.356636    6866 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cb40e37375553abb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7830115_14444937955442287182", property.function_library_fingerprint = 6684595022479997730, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718398441.555053    6920 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39cf65b44ac2da18:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7830115_14444937955442287182", property.function_library_fingerprint = 6684595022479997730, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718398441.555907    6920 tpu_compilation_cache_interface.cc:541] After adding entry for key 39cf65b44ac2da18:0:0 with session_name  cache is 201 entries (39525233055 bytes),  marked for eviction 157 entries (30982025651 bytes).
I0000 00:



I0000 00:00:1718398482.562474    6897 tpu_compile_op_common.cc:245] Compilation of 3900e1db58e426bb:0:0 with session name  took 5.944464898s and succeeded
I0000 00:00:1718398482.611910    6897 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3900e1db58e426bb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7867144_16506626008936853182", property.function_library_fingerprint = 13630222767189356112, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718398482.612216    6897 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7851380109786987 and accuracy: 0.8413865566253662


[I 2024-06-14 20:55:17,712] Trial 267 finished with value: 0.8413865566253662 and parameters: {'num_epochs': 11, 'dropout_rate': 0.26100578655043544, 'weight_decay': 0.012655484456698675, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.8543861647255141}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718398780.486074    6876 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ff954aab984207b5:0:0), session_name()
I0000 00:00:1718398828.199540    6876 tpu_compile_op_common.cc:245] Compilation of ff954aab984207b5:0:0 with session name  took 47.713387677s and succeeded
I0000 00:00:1718398828.447424    6876 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ff954aab984207b5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8025227_11809805696680210624", property.function_library_fingerprint = 8512413356380923809, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718398896.368794    6885 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7d71de05fd7e3e71:0:0), session_name()
I0000 00:00:1718398903.657958    6885 tpu_compile_op_common.cc:245] Compilation of 7d71de05fd7e3e71:0:0 with session name  took 7.289088361s and succeeded
I0000 00:00:1718398903.713503    6885 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7d71de05fd7e3e71:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8062272_4160320794632297734", property.function_library_fingerprint = 280367237332769656, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size =



I0000 00:00:1718398909.872799    6912 tpu_compile_op_common.cc:245] Compilation of b56f4aa39ea9c9a8:0:0 with session name  took 6.043358266s and succeeded
I0000 00:00:1718398909.920621    6912 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b56f4aa39ea9c9a8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8062272_4160320794632297734", property.function_library_fingerprint = 280367237332769656, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718398909.920869    6912 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7797631025314331 and accuracy: 0.8403361439704895


[I 2024-06-14 21:02:19,479] Trial 275 finished with value: 0.8403361439704895 and parameters: {'num_epochs': 9, 'dropout_rate': 0.23987833816314177, 'weight_decay': 0.01711149512664153, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.6348629529148334}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


I0000 00:00:1718399206.734027    6898 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(300d7e0d6bc6001b:0:0), session_name()
I0000 00:00:1718399254.781930    6898 tpu_compile_op_common.cc:245] Compilation of 300d7e0d6bc6001b:0:0 with session name  took 48.047835338s and succeeded
I0000 00:00:1718399255.001680    6898 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(300d7e0d6bc6001b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8216867_8497869337507573424", property.function_library_fingerprint = 6551464392206576159, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718399297.202486    6886 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a88c03cd33278d55:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8216867_8497869337507573424", property.function_library_fingerprint = 6551464392206576159, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718399297.204114    6886 tpu_compilation_cache_interface.cc:541] After adding entry for key a88c03cd33278d55:0:0 with session_name  cache is 209 entries (41034174679 bytes),  marked for eviction 165 entries (32501224849 bytes).
I0000 00:0



I0000 00:00:1718399336.673642    6841 tpu_compile_op_common.cc:245] Compilation of 3d5300448d821fd:0:0 with session name  took 6.440486193s and succeeded
I0000 00:00:1718399336.728546    6841 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3d5300448d821fd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8253916_11727574632057363456", property.function_library_fingerprint = 6300240899102826114, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718399336.728821    6841 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.7215338945388794 and accuracy: 0.8466386795043945
Epoch 1/2


I0000 00:00:1718399450.662667    6857 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(350401532797deca:0:0), session_name()
I0000 00:00:1718399535.757764    6857 tpu_compile_op_common.cc:245] Compilation of 350401532797deca:0:0 with session name  took 1m25.095013542s and succeeded
I0000 00:00:1718399536.008726    6857 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(350401532797deca:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8298443_18204232840526192900", property.function_library_fingerprint = 5270584358981615934, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/2


2024-06-14 21:13:53.642371: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718399635.508401    6863 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8f814b0b55fdd4a0:0:0), session_name()
I0000 00:00:1718399641.806496    6863 tpu_compile_op_common.cc:245] Compilation of 8f814b0b55fdd4a0:0:0 with session name  took 6.298052331s and succeeded
I0000 00:00:1718399641.834290    6863 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8f814b0b55fdd4a0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_11059135083945524762", property.function_library_fingerprint = 2214545623860618522, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718399654.583710    6857 tpu_compile_op_common.cc:245] Compilation of ade54db4ef7b8fe2:0:0 with session name  took 5.573179922s and succeeded
I0000 00:00:1718399654.625804    6857 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ade54db4ef7b8fe2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8335950_15481275318180143257", property.function_library_fingerprint = 2214545623860618522, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718399654.626217    6857 tpu_compilation_cache_interface.cc:541] After adding entry fo

File ./saved_models/disaster_FastTPU_0_model_trial_33_accuracy_0.8445_avg_score_0.8136_f1_0.7826_submission.csv has been removed.


[I 2024-06-14 21:14:15,693] Trial 282 finished with value: 0.8466386795043945 and parameters: {'num_epochs': 5, 'dropout_rate': 0.3008992990075848, 'weight_decay': 0.004309042426715115, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.954322734995181}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718399919.893974    6917 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d5459e0ddfbb9b93:0:0), session_name()
I0000 00:00:1718399965.402290    6917 tpu_compile_op_common.cc:245] Compilation of d5459e0ddfbb9b93:0:0 with session name  took 45.508246261s and succeeded
I0000 00:00:1718399965.595165    6917 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d5459e0ddfbb9b93:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8474515_17897246949151416070", property.function_library_fingerprint = 467706581698843141, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718400010.411216    6893 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fcac131764e95cae:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8474515_17897246949151416070", property.function_library_fingerprint = 467706581698843141, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718400010.412301    6893 tpu_compilation_cache_interface.cc:541] After adding entry for key fcac131764e95cae:0:0 with session_name  cache is 218 entries (42856805863 bytes),  marked for eviction 174 entries (34323855345 bytes).
I0000 00:0



I0000 00:00:1718400051.774242    6884 tpu_compile_op_common.cc:245] Compilation of 1565bb1211f498c9:0:0 with session name  took 6.0244128s and succeeded
I0000 00:00:1718400051.825315    6884 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1565bb1211f498c9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8511564_6335574499655071954", property.function_library_fingerprint = 12616662799499804226, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718400051.825530    6884 tpu_compilation_cache_interface.cc:541] After adding entry 

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7659459710121155 and accuracy: 0.8440126180648804


[I 2024-06-14 21:21:10,100] Trial 293 finished with value: 0.8440126180648804 and parameters: {'num_epochs': 6, 'dropout_rate': 0.21899832092182495, 'weight_decay': 0.014852241532371024, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7314591879449706}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718400334.074900    6880 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8984d98153f94c1a:0:0), session_name()
I0000 00:00:1718400382.368200    6880 tpu_compile_op_common.cc:245] Compilation of 8984d98153f94c1a:0:0 with session name  took 48.293202708s and succeeded
I0000 00:00:1718400382.602307    6880 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8984d98153f94c1a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8660903_12132317691797100821", property.function_library_fingerprint = 10315889022730100181, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718400452.146930    6862 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(159bed3823d62e0e:0:0), session_name()
I0000 00:00:1718400458.761908    6862 tpu_compile_op_common.cc:245] Compilation of 159bed3823d62e0e:0:0 with session name  took 6.614912342s and succeeded
I0000 00:00:1718400458.815657    6862 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(159bed3823d62e0e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8697952_3715598163007831712", property.function_library_fingerprint = 11107604730476276597, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718400464.925902    6896 tpu_compile_op_common.cc:245] Compilation of 32a1c538e8e19921:0:0 with session name  took 5.970592325s and succeeded
I0000 00:00:1718400464.970220    6896 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(32a1c538e8e19921:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8697952_3715598163007831712", property.function_library_fingerprint = 11107604730476276597, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718400464.970435    6896 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7647747993469238 and accuracy: 0.8172268867492676


[I 2024-06-14 21:28:14,619] Trial 301 finished with value: 0.8172268867492676 and parameters: {'num_epochs': 9, 'dropout_rate': 0.3151130207329537, 'weight_decay': 0.006319723221873442, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3920864182393583}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/11


I0000 00:00:1718400762.634144    6872 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4a2202d35408d6af:0:0), session_name()
I0000 00:00:1718400806.765706    6872 tpu_compile_op_common.cc:245] Compilation of 4a2202d35408d6af:0:0 with session name  took 44.131473341s and succeeded
I0000 00:00:1718400806.955634    6872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4a2202d35408d6af:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8852547_5358799440552009656", property.function_library_fingerprint = 7622147702691511515, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718400874.860582    6851 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5431dee1eae86a19:0:0), session_name()
I0000 00:00:1718400881.601726    6851 tpu_compile_op_common.cc:245] Compilation of 5431dee1eae86a19:0:0 with session name  took 6.741070502s and succeeded
I0000 00:00:1718400881.669758    6851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5431dee1eae86a19:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8889596_11664332159147102049", property.function_library_fingerprint = 1835763631899620519, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718400888.188727    6900 tpu_compile_op_common.cc:245] Compilation of 92a2173da373b0db:0:0 with session name  took 6.406109411s and succeeded
I0000 00:00:1718400888.234029    6900 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(92a2173da373b0db:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8889596_11664332159147102049", property.function_library_fingerprint = 1835763631899620519, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718400888.234238    6900 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.762863278388977 and accuracy: 0.8445377945899963


[I 2024-06-14 21:35:24,746] Trial 309 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 11, 'dropout_rate': 0.32508335567538243, 'weight_decay': 0.0078042805308429514, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9115355316233212}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718401193.883591    6837 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3a0dff983e5a5394:0:0), session_name()
I0000 00:00:1718401241.822558    6837 tpu_compile_op_common.cc:245] Compilation of 3a0dff983e5a5394:0:0 with session name  took 47.938907386s and succeeded
I0000 00:00:1718401242.062449    6837 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3a0dff983e5a5394:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9047837_5654475467149848648", property.function_library_fingerprint = 11559679692530869667, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718401287.518542    6904 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(385fbeb225ef2994:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9047837_5654475467149848648", property.function_library_fingerprint = 11559679692530869667, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718401287.519553    6904 tpu_compilation_cache_interface.cc:541] After adding entry for key 385fbeb225ef2994:0:0 with session_name  cache is 230 entries (45120376439 bytes),  marked for eviction 186 entries (36587247723 bytes).
I0000 00:



I0000 00:00:1718401327.888477    6866 tpu_compile_op_common.cc:245] Compilation of d0a3d7299247472e:0:0 with session name  took 5.354900199s and succeeded
I0000 00:00:1718401327.932434    6866 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d0a3d7299247472e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9084928_2130467577988050230", property.function_library_fingerprint = 10736722215421512177, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718401327.932699    6866 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7773666977882385 and accuracy: 0.8445377945899963


[I 2024-06-14 21:42:39,073] Trial 315 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2834221208530327, 'weight_decay': 0.009080668813473667, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.4956602239205067}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718401627.498866    6840 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7f22bf26cf194513:0:0), session_name()
I0000 00:00:1718401672.534628    6840 tpu_compile_op_common.cc:245] Compilation of 7f22bf26cf194513:0:0 with session name  took 45.035714214s and succeeded
I0000 00:00:1718401672.764445    6840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7f22bf26cf194513:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9241417_11448833832462663570", property.function_library_fingerprint = 9814714875551140811, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718401741.995378    6911 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(39793b6b77ab683f:0:0), session_name()
I0000 00:00:1718401748.647467    6911 tpu_compile_op_common.cc:245] Compilation of 39793b6b77ab683f:0:0 with session name  took 6.652018293s and succeeded
I0000 00:00:1718401748.707853    6911 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39793b6b77ab683f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9278508_8195886986045338365", property.function_library_fingerprint = 5019915281994408979, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718401754.914210    6844 tpu_compile_op_common.cc:245] Compilation of c18488fd6c349ebb:0:0 with session name  took 6.058081521s and succeeded
I0000 00:00:1718401754.964236    6844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c18488fd6c349ebb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9278508_8195886986045338365", property.function_library_fingerprint = 5019915281994408979, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718401754.964557    6844 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7846823930740356 and accuracy: 0.8356092572212219


[I 2024-06-14 21:49:49,193] Trial 322 finished with value: 0.8356092572212219 and parameters: {'num_epochs': 10, 'dropout_rate': 0.26659707857636766, 'weight_decay': 0.017517174835445605, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.37187540464062174}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718402059.316365    6893 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(39796391c0a02233:0:0), session_name()
I0000 00:00:1718402107.803514    6893 tpu_compile_op_common.cc:245] Compilation of 39796391c0a02233:0:0 with session name  took 48.487084297s and succeeded
I0000 00:00:1718402108.038242    6893 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39796391c0a02233:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9434855_16948896861257007610", property.function_library_fingerprint = 9231526141159725651, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718402177.280268    6872 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(612793ad4102be9b:0:0), session_name()
I0000 00:00:1718402183.934530    6872 tpu_compile_op_common.cc:245] Compilation of 612793ad4102be9b:0:0 with session name  took 6.654206856s and succeeded
I0000 00:00:1718402183.980703    6872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(612793ad4102be9b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9471904_18308822622580991310", property.function_library_fingerprint = 4247811965172134422, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718402189.897008    6912 tpu_compile_op_common.cc:245] Compilation of 942741369d644374:0:0 with session name  took 5.809377556s and succeeded
I0000 00:00:1718402189.937278    6912 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(942741369d644374:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9471904_18308822622580991310", property.function_library_fingerprint = 4247811965172134422, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718402189.937665    6912 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.777734637260437 and accuracy: 0.8466386795043945
Epoch 1/3


I0000 00:00:1718402315.731166    6839 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(39c7f435bbfc964:0:0), session_name()
I0000 00:00:1718402394.179651    6839 tpu_compile_op_common.cc:245] Compilation of 39c7f435bbfc964:0:0 with session name  took 1m18.448366724s and succeeded
I0000 00:00:1718402394.414627    6839 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39c7f435bbfc964:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9523439_15664805658391708970", property.function_library_fingerprint = 17895907318952806420, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz

Epoch 2/3
Epoch 3/3


2024-06-14 22:01:38.236287: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718402500.228794    6860 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b0a96b7d71eca187:0:0), session_name()
I0000 00:00:1718402505.840684    6860 tpu_compile_op_common.cc:245] Compilation of b0a96b7d71eca187:0:0 with session name  took 5.61180561s and succeeded
I0000 00:00:1718402505.885270    6860 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b0a96b7d71eca187:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14542845233752077724", property.function_library_fingerprint = 3994275154464414452, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718402519.096389    6846 tpu_compile_op_common.cc:245] Compilation of 7ebf5e81ca60b16a:0:0 with session name  took 6.138607028s and succeeded
I0000 00:00:1718402519.148179    6846 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7ebf5e81ca60b16a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9561560_2918297487598440858", property.function_library_fingerprint = 3994275154464414452, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718402519.148376    6846 tpu_compilation_cache_interface.cc:541] After adding entry for

File ./saved_models/disaster_FastTPU_0_model_trial_189_accuracy_0.8451_avg_score_0.7915_f1_0.7379_submission.csv has been removed.


[I 2024-06-14 22:02:00,188] Trial 326 finished with value: 0.8466386795043945 and parameters: {'num_epochs': 9, 'dropout_rate': 0.23503591402266732, 'weight_decay': 0.016364588825482286, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.23039331911822933}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718402789.860477    6874 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9f2e8b383b304b7a:0:0), session_name()
I0000 00:00:1718402834.853610    6874 tpu_compile_op_common.cc:245] Compilation of 9f2e8b383b304b7a:0:0 with session name  took 44.993070354s and succeeded
I0000 00:00:1718402835.088119    6874 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9f2e8b383b304b7a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9700125_13511684048235114933", property.function_library_fingerprint = 6820162452914938675, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718402876.865648    6894 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8beacdf6837764e9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9700125_13511684048235114933", property.function_library_fingerprint = 6820162452914938675, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718402876.867159    6894 tpu_compilation_cache_interface.cc:541] After adding entry for key 8beacdf6837764e9:0:0 with session_name  cache is 247 entries (48452100781 bytes),  marked for eviction 204 entries (39995971047 bytes).
I0000 00:



I0000 00:00:1718402915.855442    6930 tpu_compile_op_common.cc:245] Compilation of 1613bd4154fd446f:0:0 with session name  took 6.391057706s and succeeded
I0000 00:00:1718402915.899081    6930 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1613bd4154fd446f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9737174_10701403455965179147", property.function_library_fingerprint = 5596372110055427447, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718402915.899318    6930 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7772911787033081 and accuracy: 0.8287814855575562


[I 2024-06-14 22:09:08,982] Trial 340 finished with value: 0.8287814855575562 and parameters: {'num_epochs': 10, 'dropout_rate': 0.258585808005126, 'weight_decay': 0.06005787206242302, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.31889559536960355}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718403215.194242    6837 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5cefbd260916c1f5:0:0), session_name()
I0000 00:00:1718403261.224550    6837 tpu_compile_op_common.cc:245] Compilation of 5cefbd260916c1f5:0:0 with session name  took 46.030245768s and succeeded
I0000 00:00:1718403261.438050    6837 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5cefbd260916c1f5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9893521_11064252147257331514", property.function_library_fingerprint = 12809403960517807718, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718403329.158457    6923 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f74168a83124f00e:0:0), session_name()
I0000 00:00:1718403336.723255    6923 tpu_compile_op_common.cc:245] Compilation of f74168a83124f00e:0:0 with session name  took 7.564747047s and succeeded
I0000 00:00:1718403336.789646    6923 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f74168a83124f00e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9930570_4241435852208374510", property.function_library_fingerprint = 3070252081119160109, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718403342.901081    6842 tpu_compile_op_common.cc:245] Compilation of 10aacbe3a8385ff:0:0 with session name  took 5.997025558s and succeeded
I0000 00:00:1718403342.952119    6842 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(10aacbe3a8385ff:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9930570_4241435852208374510", property.function_library_fingerprint = 3070252081119160109, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718403342.952469    6842 tpu_compilation_cache_interface.cc:541] After adding entry f

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7300986051559448 and accuracy: 0.8503151535987854
Epoch 1/3


I0000 00:00:1718403467.588397    6853 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(da662e34ab573a91:0:0), session_name()
I0000 00:00:1718403552.467522    6853 tpu_compile_op_common.cc:245] Compilation of da662e34ab573a91:0:0 with session name  took 1m24.879052869s and succeeded
I0000 00:00:1718403552.751181    6853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(da662e34ab573a91:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9980353_12639654243147289582", property.function_library_fingerprint = 5777254204976625705, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_s

Epoch 2/3
Epoch 3/3


2024-06-14 22:20:51.232455: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718403653.023460    6921 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3479c23ce9717568:0:0), session_name()
I0000 00:00:1718403658.891857    6921 tpu_compile_op_common.cc:245] Compilation of 3479c23ce9717568:0:0 with session name  took 5.868356591s and succeeded
I0000 00:00:1718403658.921919    6921 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3479c23ce9717568:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14966275891142878139", property.function_library_fingerprint = 8052173238455885970, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718403672.164508    6911 tpu_compile_op_common.cc:245] Compilation of 3d59bb79bd67f61c:0:0 with session name  took 6.135709347s and succeeded
I0000 00:00:1718403672.224109    6911 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3d59bb79bd67f61c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10018474_11612985162065111879", property.function_library_fingerprint = 8052173238455885970, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718403672.224166    6911 tpu_compilation_cache_interface.cc:541] After adding entry f

File ./saved_models/disaster_FastTPU_0_model_trial_326_accuracy_0.8466_avg_score_0.8122_f1_0.7777_submission.csv has been removed.


[I 2024-06-14 22:21:13,280] Trial 348 finished with value: 0.8503151535987854 and parameters: {'num_epochs': 8, 'dropout_rate': 0.34743404987722143, 'weight_decay': 0.01885721537415556, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8761949708384978}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718403943.723219    6842 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(19e5159cb68252d:0:0), session_name()
I0000 00:00:1718403990.396523    6842 tpu_compile_op_common.cc:245] Compilation of 19e5159cb68252d:0:0 with session name  took 46.673245309s and succeeded
I0000 00:00:1718403990.615026    6842 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(19e5159cb68252d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10157039_1625103267562707408", property.function_library_fingerprint = 15712334807059614054, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size 



I0000 00:00:1718404035.429747    6913 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2cde730f2b0ec841:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10157039_1625103267562707408", property.function_library_fingerprint = 15712334807059614054, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718404035.431290    6913 tpu_compilation_cache_interface.cc:541] After adding entry for key 2cde730f2b0ec841:0:0 with session_name  cache is 260 entries (51029209207 bytes),  marked for eviction 217 entries (42573072379 bytes).
I0000 00



I0000 00:00:1718404074.467553    6850 tpu_compile_op_common.cc:245] Compilation of 4aded895e0c37436:0:0 with session name  took 6.304191531s and succeeded
I0000 00:00:1718404074.520814    6850 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4aded895e0c37436:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10194088_14900593073462925142", property.function_library_fingerprint = 6594396385498660568, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718404074.521089    6850 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7497818470001221 and accuracy: 0.8371848464012146


[I 2024-06-14 22:28:21,162] Trial 359 finished with value: 0.8371848464012146 and parameters: {'num_epochs': 8, 'dropout_rate': 0.34569228481108233, 'weight_decay': 0.07284792562799838, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8776002689071635}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


I0000 00:00:1718404370.876103    6853 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(30f478820216bf63:0:0), session_name()
I0000 00:00:1718404416.950866    6853 tpu_compile_op_common.cc:245] Compilation of 30f478820216bf63:0:0 with session name  took 46.074675599s and succeeded
I0000 00:00:1718404417.178526    6853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(30f478820216bf63:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10346931_2821243999260809972", property.function_library_fingerprint = 6608747402606166991, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718404462.029960    6867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eb0bc2132f016fac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10346931_2821243999260809972", property.function_library_fingerprint = 6608747402606166991, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718404462.032499    6867 tpu_compilation_cache_interface.cc:541] After adding entry for key eb0bc2132f016fac:0:0 with session_name  cache is 264 entries (51783687089 bytes),  marked for eviction 221 entries (43327550853 bytes).
I0000 00:



I0000 00:00:1718404502.187168    6873 tpu_compile_op_common.cc:245] Compilation of a69ab2d2203b547f:0:0 with session name  took 5.479256294s and succeeded
I0000 00:00:1718404502.236106    6873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a69ab2d2203b547f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10383980_4428006777407544939", property.function_library_fingerprint = 668866890931066129, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718404502.236453    6873 tpu_compilation_cache_interface.cc:541] After adding entry

Epoch 2/3
Epoch 3/3
f1 score: 0.7166883945465088 and accuracy: 0.831932783126831


[I 2024-06-14 22:35:11,040] Trial 366 finished with value: 0.831932783126831 and parameters: {'num_epochs': 3, 'dropout_rate': 0.25114959174643825, 'weight_decay': 0.019043552211800242, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4695371658269374}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/16


I0000 00:00:1718404779.794684    6885 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(35555cc323ea0e01:0:0), session_name()
I0000 00:00:1718404826.397412    6885 tpu_compile_op_common.cc:245] Compilation of 35555cc323ea0e01:0:0 with session name  took 46.602651353s and succeeded
I0000 00:00:1718404826.637486    6885 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(35555cc323ea0e01:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10528063_16127896816407285560", property.function_library_fingerprint = 11424384984841894886, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_s



I0000 00:00:1718404893.463748    6857 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bf9820de02e1c9f8:0:0), session_name()
I0000 00:00:1718404899.775925    6857 tpu_compile_op_common.cc:245] Compilation of bf9820de02e1c9f8:0:0 with session name  took 6.312101246s and succeeded
I0000 00:00:1718404899.825572    6857 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bf9820de02e1c9f8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10565112_14208329675454505216", property.function_library_fingerprint = 8637258879885147912, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718404905.883260    6867 tpu_compile_op_common.cc:245] Compilation of bbce85581eecefa5:0:0 with session name  took 5.935236887s and succeeded
I0000 00:00:1718404905.928284    6867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bbce85581eecefa5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10565112_14208329675454505216", property.function_library_fingerprint = 8637258879885147912, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718404905.928812    6867 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
f1 score: 0.7693135738372803 and accuracy: 0.8261554837226868


[I 2024-06-14 22:42:37,936] Trial 373 finished with value: 0.8261554837226868 and parameters: {'num_epochs': 16, 'dropout_rate': 0.3614048642956049, 'weight_decay': 0.016168205006294994, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9432421013506055}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/7


I0000 00:00:1718405226.515435    6875 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c0435e7451fb30be:0:0), session_name()
I0000 00:00:1718405271.996560    6875 tpu_compile_op_common.cc:245] Compilation of c0435e7451fb30be:0:0 with session name  took 45.481066718s and succeeded
I0000 00:00:1718405272.228340    6875 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c0435e7451fb30be:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10731971_16935818399596545808", property.function_library_fingerprint = 16985574207638911021, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_s



I0000 00:00:1718405339.862687    6860 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8d10f8aff657e166:0:0), session_name()
I0000 00:00:1718405346.435530    6860 tpu_compile_op_common.cc:245] Compilation of 8d10f8aff657e166:0:0 with session name  took 6.572769136s and succeeded
I0000 00:00:1718405346.482949    6860 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8d10f8aff657e166:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10769020_1637017028966186154", property.function_library_fingerprint = 11558445690105426446, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718405352.770044    6913 tpu_compile_op_common.cc:245] Compilation of b9f8c7e0f6de70af:0:0 with session name  took 6.171913925s and succeeded
I0000 00:00:1718405352.812757    6913 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b9f8c7e0f6de70af:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10769020_1637017028966186154", property.function_library_fingerprint = 11558445690105426446, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718405352.813086    6913 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7605931162834167 and accuracy: 0.8482142686843872
Epoch 1/2


I0000 00:00:1718405475.710817    6920 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(616b4c03b243cebe:0:0), session_name()
I0000 00:00:1718405553.051043    6920 tpu_compile_op_common.cc:245] Compilation of 616b4c03b243cebe:0:0 with session name  took 1m17.340134073s and succeeded
I0000 00:00:1718405553.299228    6920 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(616b4c03b243cebe:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10817051_13673830491444395795", property.function_library_fingerprint = 14956401605561452309, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants

Epoch 2/2


2024-06-14 22:54:13.353405: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718405655.534036    6852 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b1f46474d8a8efa5:0:0), session_name()
I0000 00:00:1718405660.982249    6852 tpu_compile_op_common.cc:245] Compilation of b1f46474d8a8efa5:0:0 with session name  took 5.448137239s and succeeded
I0000 00:00:1718405661.030298    6852 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b1f46474d8a8efa5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14353400504053684627", property.function_library_fingerprint = 9948710162473103817, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718405673.613671    6857 tpu_compile_op_common.cc:245] Compilation of 6098d04d8700b615:0:0 with session name  took 5.844013764s and succeeded
I0000 00:00:1718405673.662532    6857 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6098d04d8700b615:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10854558_5922740927309753404", property.function_library_fingerprint = 9948710162473103817, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718405673.662876    6857 tpu_compilation_cache_interface.cc:541] After adding entry fo

File ./saved_models/disaster_FastTPU_0_model_trial_282_accuracy_0.8466_avg_score_0.7841_f1_0.7215_submission.csv has been removed.


[I 2024-06-14 22:54:34,732] Trial 380 finished with value: 0.8482142686843872 and parameters: {'num_epochs': 7, 'dropout_rate': 0.2640326769190182, 'weight_decay': 0.015014349257546406, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8521193093358248}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718405944.570436    6867 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e14287c86c519d68:0:0), session_name()
I0000 00:00:1718405989.190006    6867 tpu_compile_op_common.cc:245] Compilation of e14287c86c519d68:0:0 with session name  took 44.619484644s and succeeded
I0000 00:00:1718405989.385317    6867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e14287c86c519d68:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10993123_8227562043747245053", property.function_library_fingerprint = 8624280097184862893, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718406058.931358    6927 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2c78bfc38b221bf0:0:0), session_name()
I0000 00:00:1718406065.580835    6927 tpu_compile_op_common.cc:245] Compilation of 2c78bfc38b221bf0:0:0 with session name  took 6.649421428s and succeeded
I0000 00:00:1718406065.633053    6927 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2c78bfc38b221bf0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11030172_13928732721127426778", property.function_library_fingerprint = 5057721057068822080, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718406071.678454    6893 tpu_compile_op_common.cc:245] Compilation of 3395360bb61cdf45:0:0 with session name  took 5.901625777s and succeeded
I0000 00:00:1718406071.723902    6893 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3395360bb61cdf45:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11030172_13928732721127426778", property.function_library_fingerprint = 5057721057068822080, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718406071.724551    6893 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7847919464111328 and accuracy: 0.8277310729026794


[I 2024-06-14 23:01:45,131] Trial 394 finished with value: 0.8277310729026794 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2777231092517841, 'weight_decay': 0.0026898503073759517, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9942167118368335}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718406372.780820    6914 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(864549ab371728b0:0:0), session_name()
I0000 00:00:1718406420.153589    6914 tpu_compile_op_common.cc:245] Compilation of 864549ab371728b0:0:0 with session name  took 47.372692671s and succeeded
I0000 00:00:1718406420.393296    6914 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(864549ab371728b0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11186519_13683922570888751460", property.function_library_fingerprint = 4503572259679802009, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718406493.392325    6912 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7544ed22f68200ae:0:0), session_name()
I0000 00:00:1718406500.593806    6912 tpu_compile_op_common.cc:245] Compilation of 7544ed22f68200ae:0:0 with session name  took 7.201421007s and succeeded
I0000 00:00:1718406500.651294    6912 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7544ed22f68200ae:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11223568_8413454971785047968", property.function_library_fingerprint = 11377343774109844617, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718406506.915831    6874 tpu_compile_op_common.cc:245] Compilation of d7071bbb8a87127c:0:0 with session name  took 6.114810044s and succeeded
I0000 00:00:1718406506.962768    6874 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d7071bbb8a87127c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11223568_8413454971785047968", property.function_library_fingerprint = 11377343774109844617, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718406506.963052    6874 tpu_compilation_cache_interface.cc:541] After adding ent

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7761275172233582 and accuracy: 0.8303571343421936


[I 2024-06-14 23:08:55,285] Trial 402 finished with value: 0.8303571343421936 and parameters: {'num_epochs': 9, 'dropout_rate': 0.27830058148672715, 'weight_decay': 0.009082111185917015, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3011489971129767}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/6


I0000 00:00:1718406802.256745    6857 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8b86b51d61a0605d:0:0), session_name()
I0000 00:00:1718406848.218495    6857 tpu_compile_op_common.cc:245] Compilation of 8b86b51d61a0605d:0:0 with session name  took 45.961670613s and succeeded
I0000 00:00:1718406848.458950    6857 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8b86b51d61a0605d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11378147_5287222310962748006", property.function_library_fingerprint = 4694203089910569622, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718406895.887744    6931 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8061011b96047ef2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11378147_5287222310962748006", property.function_library_fingerprint = 4694203089910569622, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718406895.889045    6931 tpu_compilation_cache_interface.cc:541] After adding entry for key 8061011b96047ef2:0:0 with session_name  cache is 289 entries (56624216349 bytes),  marked for eviction 246 entries (48168367937 bytes).
I0000 00:



I0000 00:00:1718406936.215711    6877 tpu_compile_op_common.cc:245] Compilation of 2b85f5d0eaf7f8d5:0:0 with session name  took 6.103151651s and succeeded
I0000 00:00:1718406936.265454    6877 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2b85f5d0eaf7f8d5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11415192_12961840322463905095", property.function_library_fingerprint = 16406971268823117381, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718406936.265713    6877 tpu_compilation_cache_interface.cc:541] After adding en

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7496414184570312 and accuracy: 0.8240545988082886


[I 2024-06-14 23:15:56,623] Trial 410 finished with value: 0.8240545988082886 and parameters: {'num_epochs': 6, 'dropout_rate': 0.29390651862629885, 'weight_decay': 0.02227131846439511, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.8047009606473172}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


I0000 00:00:1718407225.649898    6921 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eafe363d3bc9cd80:0:0), session_name()
I0000 00:00:1718407272.350728    6921 tpu_compile_op_common.cc:245] Compilation of eafe363d3bc9cd80:0:0 with session name  took 46.700775506s and succeeded
I0000 00:00:1718407272.607254    6921 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eafe363d3bc9cd80:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11564531_15764889262078587659", property.function_library_fingerprint = 6406009967909887373, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718407314.803088    6906 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ba16405acf0d97a6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11564531_15764889262078587659", property.function_library_fingerprint = 6406009967909887373, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718407314.804269    6906 tpu_compilation_cache_interface.cc:541] After adding entry for key ba16405acf0d97a6:0:0 with session_name  cache is 293 entries (57378694231 bytes),  marked for eviction 250 entries (48922845771 bytes).
I0000 00



I0000 00:00:1718407356.864587    6896 tpu_compile_op_common.cc:245] Compilation of 2fd7d336fea550e3:0:0 with session name  took 6.198859967s and succeeded
I0000 00:00:1718407356.919126    6896 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2fd7d336fea550e3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11601580_2402943177654634049", property.function_library_fingerprint = 8737752365451281353, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718407356.919321    6896 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.7311558723449707 and accuracy: 0.8366596698760986


[I 2024-06-14 23:22:49,157] Trial 418 finished with value: 0.8366596698760986 and parameters: {'num_epochs': 4, 'dropout_rate': 0.25808448359069225, 'weight_decay': 0.049737097620652704, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8314901548992597}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718407636.815751    6873 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dd298859bf977468:0:0), session_name()
I0000 00:00:1718407683.268146    6873 tpu_compile_op_common.cc:245] Compilation of dd298859bf977468:0:0 with session name  took 46.452317216s and succeeded
I0000 00:00:1718407683.505236    6873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dd298859bf977468:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11747415_12424167614576772634", property.function_library_fingerprint = 3416026991642359575, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_si



I0000 00:00:1718407726.818525    6873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a81515d28775c48a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11747415_12424167614576772634", property.function_library_fingerprint = 3416026991642359575, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718407726.820825    6873 tpu_compilation_cache_interface.cc:541] After adding entry for key a81515d28775c48a:0:0 with session_name  cache is 297 entries (58133172065 bytes),  marked for eviction 254 entries (49792486655 bytes).
I0000 00



I0000 00:00:1718407766.372614    6911 tpu_compile_op_common.cc:245] Compilation of 3e13a244bdf196a6:0:0 with session name  took 5.952348855s and succeeded
I0000 00:00:1718407766.412313    6911 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3e13a244bdf196a6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11784464_446266467571217634", property.function_library_fingerprint = 16854917606362358817, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718407766.412613    6911 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7599303722381592 and accuracy: 0.8172268867492676


[I 2024-06-14 23:29:59,825] Trial 426 finished with value: 0.8172268867492676 and parameters: {'num_epochs': 10, 'dropout_rate': 0.31343859580874156, 'weight_decay': 0.08023133907121566, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.9241934305835757}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/9


I0000 00:00:1718408068.970244    6880 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b583711ca7d49c01:0:0), session_name()
I0000 00:00:1718408116.968627    6880 tpu_compile_op_common.cc:245] Compilation of b583711ca7d49c01:0:0 with session name  took 47.998282461s and succeeded
I0000 00:00:1718408117.215608    6880 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b583711ca7d49c01:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11940953_4946314234194277054", property.function_library_fingerprint = 6555353288862366234, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718408186.544368    6840 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b5602b6bd1418624:0:0), session_name()
I0000 00:00:1718408193.847095    6840 tpu_compile_op_common.cc:245] Compilation of b5602b6bd1418624:0:0 with session name  took 7.302670605s and succeeded
I0000 00:00:1718408193.921417    6840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b5602b6bd1418624:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11978044_5334771342064158575", property.function_library_fingerprint = 6522800524793824030, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718408199.579497    6925 tpu_compile_op_common.cc:245] Compilation of 1d88675d09a9ff17:0:0 with session name  took 5.511405216s and succeeded
I0000 00:00:1718408199.621577    6925 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1d88675d09a9ff17:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11978044_5334771342064158575", property.function_library_fingerprint = 6522800524793824030, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718408199.621802    6925 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.751958429813385 and accuracy: 0.8403361439704895


[I 2024-06-14 23:37:07,182] Trial 433 finished with value: 0.8403361439704895 and parameters: {'num_epochs': 9, 'dropout_rate': 0.34198570079076396, 'weight_decay': 0.05687842925194484, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.8867023391779962}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/8


I0000 00:00:1718408495.340039    6838 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9ebede48a6d2cb32:0:0), session_name()
I0000 00:00:1718408544.972890    6838 tpu_compile_op_common.cc:245] Compilation of 9ebede48a6d2cb32:0:0 with session name  took 49.63276495s and succeeded
I0000 00:00:1718408545.206766    6838 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9ebede48a6d2cb32:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12132781_18053408282551415537", property.function_library_fingerprint = 4831936994950930452, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718408589.772336    6892 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f519f7be29847eed:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12132781_18053408282551415537", property.function_library_fingerprint = 4831936994950930452, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718408589.773266    6892 tpu_compilation_cache_interface.cc:541] After adding entry for key f519f7be29847eed:0:0 with session_name  cache is 305 entries (59642402073 bytes),  marked for eviction 261 entries (51108814585 bytes).
I0000 00



I0000 00:00:1718408633.118260    6913 tpu_compile_op_common.cc:245] Compilation of 466fd1ea4fe2e70d:0:0 with session name  took 6.365499934s and succeeded
I0000 00:00:1718408633.166871    6913 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(466fd1ea4fe2e70d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12169872_4499726588275090827", property.function_library_fingerprint = 8247943931566617612, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718408633.167290    6913 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7628425359725952 and accuracy: 0.8492646813392639
Epoch 1/3


I0000 00:00:1718408759.956439    6904 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(66bcf7eee787a751:0:0), session_name()
I0000 00:00:1718408845.918665    6904 tpu_compile_op_common.cc:245] Compilation of 66bcf7eee787a751:0:0 with session name  took 1m25.962134865s and succeeded
I0000 00:00:1718408846.196106    6904 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(66bcf7eee787a751:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12219693_15897115538177036233", property.function_library_fingerprint = 10045512165872506090, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants

Epoch 2/3
Epoch 3/3


2024-06-14 23:49:16.632126: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718408958.713929    6840 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a297a6ec2109c030:0:0), session_name()
I0000 00:00:1718408964.697200    6840 tpu_compile_op_common.cc:245] Compilation of a297a6ec2109c030:0:0 with session name  took 5.983177101s and succeeded
I0000 00:00:1718408964.762132    6840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a297a6ec2109c030:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_8768249220389015480", property.function_library_fingerprint = 11432902337357559362, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718408978.043649    6887 tpu_compile_op_common.cc:245] Compilation of 3562dc0aa0ff7226:0:0 with session name  took 5.976435182s and succeeded
I0000 00:00:1718408978.085059    6887 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3562dc0aa0ff7226:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12257856_14596151498467827858", property.function_library_fingerprint = 11432902337357559362, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,42,;24,42,;24,42,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718408978.085286    6887 tpu_compilation_cache_interface.cc:541] After adding entry 

File ./saved_models/disaster_FastTPU_0_model_trial_380_accuracy_0.8482_avg_score_0.8044_f1_0.7606_submission.csv has been removed.


[I 2024-06-14 23:49:39,132] Trial 440 finished with value: 0.8492646813392639 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2649384712591288, 'weight_decay': 0.08936626172838895, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.7508188710008451}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10


I0000 00:00:1718409250.795015    6852 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(252f8bc500e7ea7b:0:0), session_name()
I0000 00:00:1718409296.693032    6852 tpu_compile_op_common.cc:245] Compilation of 252f8bc500e7ea7b:0:0 with session name  took 45.89794151s and succeeded
I0000 00:00:1718409296.884934    6852 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(252f8bc500e7ea7b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12396421_8619974840540052254", property.function_library_fingerprint = 15660631978414658408, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_siz



I0000 00:00:1718409367.844992    6886 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ce1b5178bbeba382:0:0), session_name()
I0000 00:00:1718409375.218009    6886 tpu_compile_op_common.cc:245] Compilation of ce1b5178bbeba382:0:0 with session name  took 7.371920936s and succeeded
I0000 00:00:1718409375.281625    6886 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ce1b5178bbeba382:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12433470_1986064798711167092", property.function_library_fingerprint = 4133857368372047146, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,42,;32,42,;32,42,;32,;", property.guaranteed_constants_size



I0000 00:00:1718409381.401190    6911 tpu_compile_op_common.cc:245] Compilation of b98e0c55a46f04be:0:0 with session name  took 5.986882308s and succeeded
I0000 00:00:1718409381.453695    6911 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b98e0c55a46f04be:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12433470_1986064798711167092", property.function_library_fingerprint = 4133857368372047146, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718409381.454079    6911 tpu_compilation_cache_interface.cc:541] After adding entr

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7777811884880066 and accuracy: 0.8371848464012146


[I 2024-06-14 23:56:54,549] Trial 453 finished with value: 0.8371848464012146 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2532729682225697, 'weight_decay': 0.017040461682841547, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.28959612104349025}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12


I0000 00:00:1718409679.434436    6915 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(22ec8a39e76bf19a:0:0), session_name()
I0000 00:00:1718409728.664563    6915 tpu_compile_op_common.cc:245] Compilation of 22ec8a39e76bf19a:0:0 with session name  took 49.230063437s and succeeded
I0000 00:00:1718409728.886465    6915 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(22ec8a39e76bf19a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12589801_597226608476809042", property.function_library_fingerprint = 8063139228806171884, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,43,;32,43,;32,43,;32,;", property.guaranteed_constants_size



I0000 00:00:1718409772.948664    6863 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2eda59781d1b4f15:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12589801_597226608476809042", property.function_library_fingerprint = 8063139228806171884, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,43,;10,43,;10,43,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718409772.949350    6863 tpu_compilation_cache_interface.cc:541] After adding entry for key 2eda59781d1b4f15:0:0 with session_name  cache is 318 entries (62219634649 bytes),  marked for eviction 275 entries (53878551805 bytes).
I0000 00:0



I0000 00:00:1718409813.759538    6902 tpu_compile_op_common.cc:245] Compilation of 5c109d289344c0de:0:0 with session name  took 6.093938625s and succeeded
I0000 00:00:1718409813.800502    6902 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5c109d289344c0de:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12626846_13512230101100902814", property.function_library_fingerprint = 15548587516334063753, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,42,;14,42,;14,42,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718409813.800793    6902 tpu_compilation_cache_interface.cc:541] After adding en

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7809222340583801 and accuracy: 0.8445377945899963


[I 2024-06-15 00:04:11,207] Trial 461 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 12, 'dropout_rate': 0.2872984379523719, 'weight_decay': 0.06274047389196817, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.9797803930104327}. Best is trial 232 with value: 0.855567216873169.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/12
