# This notebook switches from using the BERT model to using the RoBERTa model. But I got better results from BERT, so future models use BERT instead of RoBERTa

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nlp-getting-started/sample_submission.csv
/kaggle/input/nlp-getting-started/train.csv
/kaggle/input/nlp-getting-started/test.csv
/kaggle/input/certification/BaltimoreCyberTrustRoot.crt.pem


In [2]:
import numpy as np
import pandas as pd
import random
import os
import re
import json
from transformers import set_seed, RobertaTokenizer, TFRobertaForSequenceClassification, RobertaConfig
import tensorflow as tf
%pip install optuna
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)
set_seed(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Install necessary packages for Azure SQL connection
%pip install mysql-connector-python 
%pip install PyMySQL

# Suppress TensorFlow logging
tf.get_logger().setLevel('ERROR')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Suppress other warnings (commented out for now)
# warnings.filterwarnings('ignore')

# Suppress absl TPU cache logging
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

# Additional configuration to suppress specific TPU cache logs
tf.autograph.set_verbosity(3)
tf.get_logger().propagate = False

# Load the training data
train_data = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
kaggle_test_data = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')

# Split the data into 75% training and 25% validation sets
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42, stratify=train_data['target'])

# Clean the text data
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)     # Remove mentions
    text = re.sub(r'\d+', '', text)      # Remove numbers
    text = re.sub(r'[^\w\s#]', '', text)  # Remove punctuation except hashtags
    #text = text.lower()                  # Convert to lowercase
    return text  # Do not convert to lowercase for RoBERTa

train_data['clean_text'] = train_data['text'].apply(clean_text)
val_data['clean_text'] = val_data['text'].apply(clean_text)
kaggle_test_data['clean_text'] = kaggle_test_data['text'].apply(clean_text)

# Tokenize the text data
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

def tokenize_texts(texts):
    return tokenizer(
        texts.tolist(),
        max_length=64,
        padding=True,
        truncation=True,
        return_tensors='tf'
    )

# Encode the clean text data
train_encodings = tokenize_texts(train_data['clean_text'])
val_encodings = tokenize_texts(val_data['clean_text'])
kaggle_test_encodings = tokenize_texts(kaggle_test_data['clean_text'])

train_labels = tf.convert_to_tensor(train_data['target'].values)
val_labels = tf.convert_to_tensor(val_data['target'].values)

def compute_metrics(predictions, labels):
    predictions = np.argmax(predictions, axis=1)
    f1 = f1_score(labels, predictions)
    accuracy = accuracy_score(labels, predictions)
    return {'f1': f1, 'accuracy': accuracy}

def create_tf_dataset(encodings, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((encodings, labels))
    dataset = dataset.cache()  # Cache the dataset
    dataset = dataset.shuffle(10000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)  # Prefetching
    return dataset

# Initialize TPU
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.TPUStrategy(resolver)
    tpu_cores = strategy.num_replicas_in_sync
    print(f"TPU cores available: {tpu_cores}")
except ValueError:
    print("TPU not found")
    raise SystemExit

# Directory to save models
model_save_dir = './saved_models'
os.makedirs(model_save_dir, exist_ok=True)

# File to store top 5 model predictions
top_predictions_file = 'top_5_predictions.json'

# Load existing top 5 predictions
if os.path.exists(top_predictions_file):
    with open(top_predictions_file, 'r') as file:
        top_predictions = json.load(file)
else:
    top_predictions = []

# Function to save top predictions
def save_top_predictions(predictions_file, val_accuracy, model_number, phase):
    global top_predictions
    
    new_entry = {
        'model_number': model_number,
        'val_accuracy': val_accuracy,
        'predictions_file': predictions_file,
        'phase': phase
    }
    
    # Add the new entry and sort by validation accuracy
    top_predictions.append(new_entry)
    top_predictions = sorted(top_predictions, key=lambda x: x['val_accuracy'], reverse=True)
    
    # If there are more than 5 entries, remove the one with the lowest accuracy
    if len(top_predictions) > 5:
        removed_entry = top_predictions.pop()
        # Check if the file exists before attempting to remove it
        if os.path.exists(removed_entry['predictions_file']):
            os.remove(removed_entry['predictions_file'])
            print(f"File {removed_entry['predictions_file']} ({removed_entry['phase']}) has been removed.")
        else:
            print(f"File {removed_entry['predictions_file']} ({removed_entry['phase']}) does not exist and cannot be removed.")
    
    # Save the updated top predictions to file
    with open(top_predictions_file, 'w') as file:
        json.dump(top_predictions, file, indent=4)

# Set fixed batch size and learning rate parameters
base_learning_rate = 1e-5
batch_size_per_core = 32
tpu_cores = 8
batch_size = batch_size_per_core * tpu_cores
learning_rate = base_learning_rate * (batch_size / (batch_size_per_core * tpu_cores))

# Create the datasets outside the objective function
train_dataset = create_tf_dataset(dict(train_encodings), train_labels, batch_size)
val_dataset = create_tf_dataset(dict(val_encodings), val_labels, batch_size)
kaggle_test_dataset = tf.data.Dataset.from_tensor_slices(dict(kaggle_test_encodings)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).cache()

fine_tune_encodings = tokenize_texts(val_data['clean_text'])
fine_tune_labels = tf.convert_to_tensor(val_data['target'].values)
fine_tune_dataset = tf.data.Dataset.from_tensor_slices((
    dict(fine_tune_encodings),
    fine_tune_labels
)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE).cache()

# Track top 5 models
def objective(trial):
    num_epochs = trial.suggest_int("num_epochs", 1, 20)
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.1)
    lr_scheduler_type = trial.suggest_categorical("lr_scheduler_type", ["constant", "linear", "cosine", "cosine_with_restarts"])
    gradient_clip_norm = trial.suggest_float("gradient_clip_norm", 0.0, 1.0)

    with strategy.scope():
        precision = tf.keras.metrics.Precision()
        recall = tf.keras.metrics.Recall()

        def f1_score_custom(y_true, y_pred):
            # Convert logits to predicted labels
            y_pred = tf.argmax(y_pred, axis=1)
            
            # Ensure true labels are in integer format
            y_true = tf.cast(y_true, tf.int64)
            
            # Update the state of precision and recall
            precision.update_state(y_true, y_pred)
            recall.update_state(y_true, y_pred)
            
            # Compute precision and recall values
            precision_result = precision.result()
            recall_result = recall.result()
            
            # Compute F1 score
            f1 = 2 * ((precision_result * recall_result) / (precision_result + recall_result + tf.keras.backend.epsilon()))
            
            return f1

        config = RobertaConfig.from_pretrained('roberta-base', num_labels=2, hidden_dropout_prob=dropout_rate)
        model = TFRobertaForSequenceClassification.from_pretrained('roberta-base', config=config)

        if lr_scheduler_type == "linear":
            lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000,
                end_learning_rate=0.0,
                power=1.0
            )
        elif lr_scheduler_type == "cosine":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
                initial_learning_rate=learning_rate,
                decay_steps=10000
            )
        elif lr_scheduler_type == "cosine_with_restarts":
            lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
                initial_learning_rate=learning_rate,
                first_decay_steps=1000
            )
        else:
            lr_schedule = learning_rate

        optimizer = tf.keras.optimizers.experimental.AdamW(
            learning_rate=lr_schedule,
            weight_decay=weight_decay,
            epsilon=1e-8,
            clipnorm=gradient_clip_norm
        )

        model.compile(
            optimizer=optimizer, 
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy'), f1_score_custom],
            steps_per_execution=32  # Experiment with different values like 16, 32, 64
        )

    model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset, verbose=1)

    # Evaluate on validation set
    val_loss, val_accuracy, val_f1_score = model.evaluate(val_dataset, verbose=1)
    print(f"f1 score: {val_f1_score} and accuracy: {val_accuracy}")
    
    avg_score = (val_accuracy + val_f1_score) / 2

    # Save pre-fine-tuning predictions
    if len(top_predictions) < 5 or val_accuracy > min(top_predictions, key=lambda x: x['val_accuracy'])['val_accuracy']:
        kaggle_test_predictions = model.predict(kaggle_test_dataset).logits
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()
        pre_fine_tuning_predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}_pre_fine_tuning_submission.csv") 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(pre_fine_tuning_predictions_file, index=False)
        save_top_predictions(pre_fine_tuning_predictions_file, val_accuracy, trial.number, 'pre_fine_tuning')

        # Fine-tuning step
        training_data_size = len(train_data)
        fine_tune_data_size = len(val_data)
        fine_tune_epochs = max(1, round((fine_tune_data_size / training_data_size) * num_epochs))

        model.fit(fine_tune_dataset, epochs=fine_tune_epochs, verbose=1)

        # Make post-fine-tuning predictions on the Kaggle test dataset
        kaggle_test_predictions = model.predict(kaggle_test_dataset).logits
        kaggle_test_predicted_labels = tf.argmax(kaggle_test_predictions, axis=1).numpy()
        post_fine_tuning_predictions_file = os.path.join(model_save_dir, f"{studyName}_model_trial_{trial.number}_accuracy_{val_accuracy:.4f}_avg_score_{avg_score:.4f}_f1_{val_f1_score:.4f}_post_fine_tuning_submission.csv") 
        submission = pd.DataFrame({'id': kaggle_test_data['id'], 'target': kaggle_test_predicted_labels})
        submission.to_csv(post_fine_tuning_predictions_file, index=False)

        # Save the predictions and accuracy to the top 5 list
        save_top_predictions(post_fine_tuning_predictions_file, val_accuracy, trial.number, 'post_fine_tuning')

    return val_accuracy

# Define your Optuna study, using the MySQL connection string
optuna_storage = 'mysql+pymysql://<username>:<password>@<host>/<database>?ssl_ca=<path_to_CA_cert>&ssl_verify_cert=true'

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
db_password = user_secrets.get_secret("DB_PASSWORD")# This uses the secrets inside of Kaggle so I don't have to explicitly type my password out in code

# Example with your details (replace '<password>' with your real password and '<database>' with your database name)
optuna_storage = f'mysql+pymysql://MichaelAzure:{db_password}@kaggle-third-sql.mysql.database.azure.com/kaggle_disaster_database?ssl_ca=/kaggle/input/certification&ssl_verify_cert=true'

studyName = 'disaster_RoBerta_1'
study = optuna.create_study(study_name=studyName, # name of the study
                            storage=optuna_storage,  # URL for the mySQL schema
                            direction='maximize', # maximize the log loss
                            load_if_exists=True, # makes it so that if the study_name already exists in the schema, then it will append the new trials with the old trials and essentially resume the study. It will also remember the previous trials so it really is resuming the study
                            )

study.optimize(objective, n_trials=100)

print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

# Save top predictions JSON file to output directory for later use
output_top_predictions_file = '/kaggle/working/top_5_predictions.json'
with open(output_top_predictions_file, 'w') as file:
    json.dump(top_predictions, file, indent=4)

print(f"Top 5 predictions saved to {output_top_predictions_file}")


  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1718814507.454146      30 common_lib.cc:798] Could not set metric server port: INVALID_ARGUMENT: Could not find SliceBuilder port 8471 in any of the 0 ports provided in `tpu_process_addresses`="localhost"
=== Source Location Trace: === 
learning/45eac/tfrc/runtime/common_lib.cc:479
D0619 16:28:27.462876879      30 config.cc:196]                        gRPC EXPERIMENT call_status_override_on_cancellation   OFF (default:OFF)
D0619 16:28:27.462893869      30 config.cc:196]                        gRPC EXPERIMENT call_v3                                OFF (default:OFF)
D0619 16:28:27.462897422      30 config.cc:196]                        gRPC EXPERIMENT canary_client_privacy                  ON  (default:ON)
D0619 16:28:27.462900018      30 config.cc:196]                        gRPC EXPERIMENT capture_base_context                   ON  (default:ON)
D0619 16:28:27.462902560      30 config.cc:196]                        gRPC EXPER

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting colorlog
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting sqlalchemy>=1.3.0
  Downloading SQLAlchemy-2.0.31-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting alembic>=1.5.0
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
Collecting Mako
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting greenlet!=0.4.17
  Downloading greenlet

I0000 00:00:1718814540.299659      30 service.cc:145] XLA service 0x570ab129bcf0 initialized for platform TPU (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1718814540.299716      30 service.cc:153]   StreamExecutor device (0): TPU, 2a886c8
I0000 00:00:1718814540.299721      30 service.cc:153]   StreamExecutor device (1): TPU, 2a886c8
I0000 00:00:1718814540.299724      30 service.cc:153]   StreamExecutor device (2): TPU, 2a886c8
I0000 00:00:1718814540.299727      30 service.cc:153]   StreamExecutor device (3): TPU, 2a886c8
I0000 00:00:1718814540.299729      30 service.cc:153]   StreamExecutor device (4): TPU, 2a886c8
I0000 00:00:1718814540.299732      30 service.cc:153]   StreamExecutor device (5): TPU, 2a886c8
I0000 00:00:1718814540.299735      30 service.cc:153]   StreamExecutor device (6): TPU, 2a886c8
I0000 00:00:1718814540.299737      30 service.cc:153]   StreamExecutor device (7): TPU, 2a886c8


TPU cores available: 8


[I 2024-06-19 16:29:08,152] Using an existing study with name 'disaster_RoBerta_1' instead of creating a new one.
I0000 00:00:1718814550.269960      30 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaForSequenceClassification were



I0000 00:00:1718814908.926796     866 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fcd29aecfe6c0cdd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_133130_8566221946827727408", property.function_library_fingerprint = 9445532351988877056, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718814908.927745     866 tpu_compilation_cache_interface.cc:541] After adding entry for key fcd29aecfe6c0cdd:0:0 with session_name  cache is 2 entries (633823468 bytes),  marked for eviction 0 entries (0 bytes).
I0000 00:00:1718814935.479834     



I0000 00:00:1718814946.570073     802 tpu_compile_op_common.cc:245] Compilation of 95a69e14b1ee16b2:0:0 with session name  took 5.176770367s and succeeded
I0000 00:00:1718814946.609859     802 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(95a69e14b1ee16b2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_169638_12831957828625493100", property.function_library_fingerprint = 5053152750684246245, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718814946.610020     802 tpu_compilation_cache_interface.cc:541] After adding entry for ke

f1 score: 0.5485469102859497 and accuracy: 0.819327712059021


2024-06-19 16:36:07.774449: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718814969.485684     791 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d5bc4dc8aa8af78b:0:0), session_name()
I0000 00:00:1718814973.810041     791 tpu_compile_op_common.cc:245] Compilation of d5bc4dc8aa8af78b:0:0 with session name  took 4.324313799s and succeeded
I0000 00:00:1718814973.837646     791 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d5bc4dc8aa8af78b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_16390557141864835095", property.function_library_fingerprint = 17925840106691722244, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, top



I0000 00:00:1718814984.164888     793 tpu_compile_op_common.cc:245] Compilation of 25678676ebe48c41:0:0 with session name  took 5.21611472s and succeeded
I0000 00:00:1718814984.205839     793 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(25678676ebe48c41:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_180882_3710106820593859541", property.function_library_fingerprint = 17925840106691722244, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718814984.206006     793 tpu_compilation_cache_interface.cc:541] After adding entry for key 256



[I 2024-06-19 16:40:21,950] Trial 1 finished with value: 0.819327712059021 and parameters: {'num_epochs': 1, 'dropout_rate': 0.1001234773292314, 'weight_decay': 0.07966206782315433, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.5502151116915911}. Best is trial 1 with value: 0.819327712059021.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRo

Epoch 1/8


I0000 00:00:1718815470.552314     802 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7f5e0003677b2d8:0:0), session_name()
I0000 00:00:1718815512.614279     802 tpu_compile_op_common.cc:245] Compilation of 7f5e0003677b2d8:0:0 with session name  took 42.061918501s and succeeded
I0000 00:00:1718815512.830821     802 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7f5e0003677b2d8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_381195_13839678496911267320", property.function_library_fingerprint = 15440065135030615472, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, emb



I0000 00:00:1718815576.148625     831 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e016b091411f56d1:0:0), session_name()
I0000 00:00:1718815581.735778     831 tpu_compile_op_common.cc:245] Compilation of e016b091411f56d1:0:0 with session name  took 5.587109561s and succeeded
I0000 00:00:1718815581.786259     831 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e016b091411f56d1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_417765_10476477814255077574", property.function_library_fingerprint = 13339773585768346446, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718815587.141712     815 tpu_compile_op_common.cc:245] Compilation of aa6963f0a5baf9e:0:0 with session name  took 5.259128782s and succeeded
I0000 00:00:1718815587.186964     815 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aa6963f0a5baf9e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_417765_10476477814255077574", property.function_library_fingerprint = 13339773585768346446, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718815587.187148     815 tpu_compilation_cache_interface.cc:541] After adding entry for key

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.5539930462837219 and accuracy: 0.8103991746902466


2024-06-19 16:47:13.784467: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718815635.499263     780 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ca8c4b16a8f026a3:0:0), session_name()
I0000 00:00:1718815639.969737     780 tpu_compile_op_common.cc:245] Compilation of ca8c4b16a8f026a3:0:0 with session name  took 4.470427526s and succeeded
I0000 00:00:1718815639.997904     780 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ca8c4b16a8f026a3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_6474136847988158557", property.function_library_fingerprint = 3357120869496404520, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718815650.468658     840 tpu_compile_op_common.cc:245] Compilation of ac0a651c2e7fd801:0:0 with session name  took 5.1653478s and succeeded
I0000 00:00:1718815650.500799     840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ac0a651c2e7fd801:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_440601_14192563863855817356", property.function_library_fingerprint = 3357120869496404520, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718815650.500992     840 tpu_compilation_cache_interface.cc:541] After adding entry for key ac0a

Epoch 1/3


I0000 00:00:1718815744.479403     863 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1281110d5b50ad4c:0:0), session_name()
I0000 00:00:1718815814.802906     863 tpu_compile_op_common.cc:245] Compilation of 1281110d5b50ad4c:0:0 with session name  took 1m10.323458371s and succeeded
I0000 00:00:1718815815.082129     863 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1281110d5b50ad4c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_475486_2470948056654458598", property.function_library_fingerprint = 18087840955465010121, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0,

Epoch 2/3
Epoch 3/3


[I 2024-06-19 16:51:29,786] Trial 6 finished with value: 0.8103991746902466 and parameters: {'num_epochs': 8, 'dropout_rate': 0.4035718031971067, 'weight_decay': 0.02076502127355433, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.9451935784290166}. Best is trial 0 with value: 0.831932783126831.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2

Epoch 1/11


I0000 00:00:1718816144.724026     836 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(37bc9fa159fba2ef:0:0), session_name()
I0000 00:00:1718816187.680416     836 tpu_compile_op_common.cc:245] Compilation of 37bc9fa159fba2ef:0:0 with session name  took 42.956342356s and succeeded
I0000 00:00:1718816187.868634     836 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(37bc9fa159fba2ef:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_641984_14772990174347223246", property.function_library_fingerprint = 18286547968280593027, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718816252.881199     836 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ab72599431599db4:0:0), session_name()
I0000 00:00:1718816258.563899     836 tpu_compile_op_common.cc:245] Compilation of ab72599431599db4:0:0 with session name  took 5.682616163s and succeeded
I0000 00:00:1718816258.617607     836 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ab72599431599db4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_678492_11997088289921653474", property.function_library_fingerprint = 11582606452649202512, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718816264.045872     851 tpu_compile_op_common.cc:245] Compilation of ecf027811888c019:0:0 with session name  took 5.32501072s and succeeded
I0000 00:00:1718816264.090786     851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ecf027811888c019:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_678492_11997088289921653474", property.function_library_fingerprint = 11582606452649202512, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718816264.090979     851 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7443740963935852 and accuracy: 0.8492646813392639


2024-06-19 16:58:41.002026: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718816322.712156     844 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(af12b60f985e82dd:0:0), session_name()
I0000 00:00:1718816327.212396     844 tpu_compile_op_common.cc:245] Compilation of af12b60f985e82dd:0:0 with session name  took 4.500194498s and succeeded
I0000 00:00:1718816327.241746     844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(af12b60f985e82dd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_14665994393527413149", property.function_library_fingerprint = 1472370995321897140, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718816337.776462     872 tpu_compile_op_common.cc:245] Compilation of f04b7d0e961ea8f3:0:0 with session name  took 4.951084286s and succeeded
I0000 00:00:1718816337.818823     872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f04b7d0e961ea8f3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_706296_5452134759278027206", property.function_library_fingerprint = 1472370995321897140, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718816337.819003     872 tpu_compilation_cache_interface.cc:541] After adding entry for key f04

Epoch 1/4


I0000 00:00:1718816430.325377     821 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(49aa9101e7ace44:0:0), session_name()
I0000 00:00:1718816501.998681     821 tpu_compile_op_common.cc:245] Compilation of 49aa9101e7ace44:0:0 with session name  took 1m11.673257215s and succeeded
I0000 00:00:1718816502.247552     821 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(49aa9101e7ace44:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_741123_13169685857611788668", property.function_library_fingerprint = 10382426382761694053, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e

Epoch 2/4
Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_RoBerta_1_model_trial_6_accuracy_0.8104_avg_score_0.6822_f1_0.5540_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 17:03:00,127] Trial 12 finished with value: 0.8492646813392639 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2876498437642344, 'weight_decay': 0.08121539060377789, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.34971813637624216}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/13


I0000 00:00:1718816839.340479     844 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2a94356a63266fc0:0:0), session_name()
I0000 00:00:1718816881.678112     844 tpu_compile_op_common.cc:245] Compilation of 2a94356a63266fc0:0:0 with session name  took 42.337589721s and succeeded
I0000 00:00:1718816881.906694     844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2a94356a63266fc0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_908197_5149826500262130158", property.function_library_fingerprint = 18432258350269012582, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718816947.143702     833 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(18678ce07f6a76d8:0:0), session_name()
I0000 00:00:1718816952.988696     833 tpu_compile_op_common.cc:245] Compilation of 18678ce07f6a76d8:0:0 with session name  took 5.844946871s and succeeded
I0000 00:00:1718816953.031547     833 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(18678ce07f6a76d8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_944721_5553284703839059965", property.function_library_fingerprint = 11720041187591980439, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718816958.724349     861 tpu_compile_op_common.cc:245] Compilation of 325f1c9c896bbb43:0:0 with session name  took 5.597216675s and succeeded
I0000 00:00:1718816958.763502     861 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(325f1c9c896bbb43:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_944721_5553284703839059965", property.function_library_fingerprint = 11720041187591980439, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718816958.763684     861 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.6878683567047119 and accuracy: 0.8293067216873169


2024-06-19 17:10:23.933671: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718817025.622153     849 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9ad26c25a0d8c28:0:0), session_name()
I0000 00:00:1718817030.195573     849 tpu_compile_op_common.cc:245] Compilation of 9ad26c25a0d8c28:0:0 with session name  took 4.573381059s and succeeded
I0000 00:00:1718817030.227097     849 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9ad26c25a0d8c28:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_9066967840513109860", property.function_library_fingerprint = 7697494552238739060, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology



I0000 00:00:1718817041.387955     785 tpu_compile_op_common.cc:245] Compilation of 25e59a8d8174672e:0:0 with session name  took 5.471439504s and succeeded
I0000 00:00:1718817041.426563     785 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(25e59a8d8174672e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_975837_6386167621240415392", property.function_library_fingerprint = 7697494552238739060, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718817041.426741     785 tpu_compilation_cache_interface.cc:541] After adding entry for key 25e

File ./saved_models/disaster_RoBerta_1_model_trial_6_accuracy_0.8104_avg_score_0.6822_f1_0.5540_pre_fine_tuning_submission.csv (pre_fine_tuning) has been removed.
Epoch 1/4


I0000 00:00:1718817138.605113     856 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(92e4fc7c77d226f2:0:0), session_name()
I0000 00:00:1718817214.765986     856 tpu_compile_op_common.cc:245] Compilation of 92e4fc7c77d226f2:0:0 with session name  took 1m16.160824155s and succeeded
I0000 00:00:1718817215.036654     856 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(92e4fc7c77d226f2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1010680_18163890387715439901", property.function_library_fingerprint = 9335974906762127009, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0

Epoch 2/4
Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_RoBerta_1_model_trial_1_accuracy_0.8193_avg_score_0.6839_f1_0.5485_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 17:14:47,720] Trial 18 finished with value: 0.8293067216873169 and parameters: {'num_epochs': 13, 'dropout_rate': 0.3707626139354483, 'weight_decay': 0.09917245841046689, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.7360638342260294}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TF

Epoch 1/15


I0000 00:00:1718817547.280858     804 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(26f78b39645be65f:0:0), session_name()
I0000 00:00:1718817589.476370     804 tpu_compile_op_common.cc:245] Compilation of 26f78b39645be65f:0:0 with session name  took 42.195442128s and succeeded
I0000 00:00:1718817589.708757     804 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(26f78b39645be65f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1177714_7954966154539947719", property.function_library_fingerprint = 16466529012194739609, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718817655.167569     789 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(42bfc83a1df601de:0:0), session_name()
I0000 00:00:1718817660.797763     789 tpu_compile_op_common.cc:245] Compilation of 42bfc83a1df601de:0:0 with session name  took 5.630153419s and succeeded
I0000 00:00:1718817660.846164     789 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(42bfc83a1df601de:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1214222_7196781882570507181", property.function_library_fingerprint = 12194331435737988877, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718817666.819999     809 tpu_compile_op_common.cc:245] Compilation of 8cf9362cc679630c:0:0 with session name  took 5.879440635s and succeeded
I0000 00:00:1718817666.866783     809 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8cf9362cc679630c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1214222_7196781882570507181", property.function_library_fingerprint = 12194331435737988877, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718817666.867080     809 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
f1 score: 0.7880714535713196 and accuracy: 0.8392857313156128


2024-06-19 17:22:17.292978: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718817738.984491     867 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1382a8f7cca58039:0:0), session_name()
I0000 00:00:1718817743.762990     867 tpu_compile_op_common.cc:245] Compilation of 1382a8f7cca58039:0:0 with session name  took 4.778452553s and succeeded
I0000 00:00:1718817743.790091     867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1382a8f7cca58039:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_12263289092565295394", property.function_library_fingerprint = 410661386158826171, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topol



I0000 00:00:1718817754.470601     825 tpu_compile_op_common.cc:245] Compilation of 25cd43925b1409ee:0:0 with session name  took 5.061192888s and succeeded
I0000 00:00:1718817754.510496     825 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(25cd43925b1409ee:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1248650_17563233441513170375", property.function_library_fingerprint = 410661386158826171, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718817754.510667     825 tpu_compilation_cache_interface.cc:541] After adding entry for key 25

File ./saved_models/disaster_RoBerta_1_model_trial_1_accuracy_0.8193_avg_score_0.6839_f1_0.5485_pre_fine_tuning_submission.csv (pre_fine_tuning) has been removed.
Epoch 1/5


I0000 00:00:1718817848.917146     836 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(70f6eb7e559733f2:0:0), session_name()
I0000 00:00:1718817922.908323     836 tpu_compile_op_common.cc:245] Compilation of 70f6eb7e559733f2:0:0 with session name  took 1m13.991106694s and succeeded
I0000 00:00:1718817923.182174     836 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(70f6eb7e559733f2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1283477_2549159098596776549", property.function_library_fingerprint = 13986654455773365622, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
File ./saved_models/disaster_RoBerta_1_model_trial_18_accuracy_0.8293_avg_score_0.7586_f1_0.6879_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 17:26:39,073] Trial 24 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 15, 'dropout_rate': 0.23736454318054873, 'weight_decay': 0.00787594172523677, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.41933156673054117}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/6


I0000 00:00:1718818258.048551     848 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8e84a50bb92aa641:0:0), session_name()
I0000 00:00:1718818302.054176     848 tpu_compile_op_common.cc:245] Compilation of 8e84a50bb92aa641:0:0 with session name  took 44.005573836s and succeeded
I0000 00:00:1718818302.263947     848 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8e84a50bb92aa641:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1451077_227125837278801260", property.function_library_fingerprint = 17079171961109028967, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718818365.156803     864 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5b4b8ddb6590e3db:0:0), session_name()
I0000 00:00:1718818370.759479     864 tpu_compile_op_common.cc:245] Compilation of 5b4b8ddb6590e3db:0:0 with session name  took 5.602631974s and succeeded
I0000 00:00:1718818370.806206     864 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5b4b8ddb6590e3db:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1487585_13699189767482419635", property.function_library_fingerprint = 6268874214404937735, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718818376.251890     827 tpu_compile_op_common.cc:245] Compilation of 1fa1223b96b666a3:0:0 with session name  took 5.318459919s and succeeded
I0000 00:00:1718818376.297946     827 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1fa1223b96b666a3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1487585_13699189767482419635", property.function_library_fingerprint = 6268874214404937735, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718818376.298192     827 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.6737608313560486 and accuracy: 0.8303571343421936


2024-06-19 17:33:36.470365: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718818418.184918     794 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(37919a9dcb688c3c:0:0), session_name()
I0000 00:00:1718818422.634780     794 tpu_compile_op_common.cc:245] Compilation of 37919a9dcb688c3c:0:0 with session name  took 4.44981796s and succeeded
I0000 00:00:1718818422.661170     794 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(37919a9dcb688c3c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_10947096422261478807", property.function_library_fingerprint = 10157031308701604516, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718818433.922055     812 tpu_compile_op_common.cc:245] Compilation of c472571ab95e31d7:0:0 with session name  took 5.36647252s and succeeded
I0000 00:00:1718818433.960119     812 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c472571ab95e31d7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1507109_6546359728656035936", property.function_library_fingerprint = 10157031308701604516, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718818433.960343     812 tpu_compilation_cache_interface.cc:541] After adding entry for key c4

File ./saved_models/disaster_RoBerta_1_model_trial_18_accuracy_0.8293_avg_score_0.7586_f1_0.6879_pre_fine_tuning_submission.csv (pre_fine_tuning) has been removed.
Epoch 1/2


I0000 00:00:1718818530.189024     834 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eb309d91a8fb7cd3:0:0), session_name()
I0000 00:00:1718818598.434667     834 tpu_compile_op_common.cc:245] Compilation of eb309d91a8fb7cd3:0:0 with session name  took 1m8.245591617s and succeeded
I0000 00:00:1718818598.654479     834 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eb309d91a8fb7cd3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1541936_14764374915443972676", property.function_library_fingerprint = 13219611676059590282, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0

Epoch 2/2
File ./saved_models/disaster_RoBerta_1_model_trial_32_accuracy_0.8304_avg_score_0.7521_f1_0.6738_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 17:37:55,362] Trial 32 finished with value: 0.8303571343421936 and parameters: {'num_epochs': 6, 'dropout_rate': 0.31459699824858467, 'weight_decay': 0.06313874383125463, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.1807847791735933}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model 

Epoch 1/11


I0000 00:00:1718818933.101949     794 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2eea0dde8c1e08ff:0:0), session_name()
I0000 00:00:1718818976.679862     794 tpu_compile_op_common.cc:245] Compilation of 2eea0dde8c1e08ff:0:0 with session name  took 43.577866474s and succeeded
I0000 00:00:1718818976.910169     794 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2eea0dde8c1e08ff:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1707862_1242786362601610778", property.function_library_fingerprint = 712055650795316848, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718819040.524494     873 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fd19028b77f3f305:0:0), session_name()
I0000 00:00:1718819046.120884     873 tpu_compile_op_common.cc:245] Compilation of fd19028b77f3f305:0:0 with session name  took 5.596331812s and succeeded
I0000 00:00:1718819046.168055     873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fd19028b77f3f305:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1744390_9630100496399350170", property.function_library_fingerprint = 7915957119068855236, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718819052.044715     812 tpu_compile_op_common.cc:245] Compilation of 3018181c91b1857b:0:0 with session name  took 5.778512096s and succeeded
I0000 00:00:1718819052.089089     812 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3018181c91b1857b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1744390_9630100496399350170", property.function_library_fingerprint = 7915957119068855236, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718819052.089337     812 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7440685629844666 and accuracy: 0.8461134433746338


2024-06-19 17:45:09.375858: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718819111.119816     838 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6981482544f13d4b:0:0), session_name()
I0000 00:00:1718819115.447317     838 tpu_compile_op_common.cc:245] Compilation of 6981482544f13d4b:0:0 with session name  took 4.32745479s and succeeded
I0000 00:00:1718819115.473311     838 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6981482544f13d4b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_4151192373718592912", property.function_library_fingerprint = 8358958851037509153, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolo



I0000 00:00:1718819126.413704     860 tpu_compile_op_common.cc:245] Compilation of 2a546304c179c900:0:0 with session name  took 5.432347338s and succeeded
I0000 00:00:1718819126.451881     860 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2a546304c179c900:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1772194_18077743453150377189", property.function_library_fingerprint = 8358958851037509153, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718819126.452062     860 tpu_compilation_cache_interface.cc:541] After adding entry for key 2

File ./saved_models/disaster_RoBerta_1_model_trial_32_accuracy_0.8304_avg_score_0.7521_f1_0.6738_pre_fine_tuning_submission.csv (pre_fine_tuning) has been removed.
Epoch 1/4


I0000 00:00:1718819221.425572     830 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1e96cc6722f6956e:0:0), session_name()
I0000 00:00:1718819299.360912     830 tpu_compile_op_common.cc:245] Compilation of 1e96cc6722f6956e:0:0 with session name  took 1m17.935282189s and succeeded
I0000 00:00:1718819299.637846     830 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1e96cc6722f6956e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1807041_6309399949067933893", property.function_library_fingerprint = 14901206844043475013, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0

Epoch 2/4
Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_RoBerta_1_model_trial_24_accuracy_0.8393_avg_score_0.8137_f1_0.7881_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 17:49:44,388] Trial 38 finished with value: 0.8461134433746338 and parameters: {'num_epochs': 11, 'dropout_rate': 0.28155048195408194, 'weight_decay': 0.07998327704497514, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.5583687932900245}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model T

Epoch 1/8


I0000 00:00:1718819642.954437     863 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(712993d8e5a2bf06:0:0), session_name()
I0000 00:00:1718819685.101123     863 tpu_compile_op_common.cc:245] Compilation of 712993d8e5a2bf06:0:0 with session name  took 42.146621291s and succeeded
I0000 00:00:1718819685.283541     863 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(712993d8e5a2bf06:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_1974293_8033561764247981436", property.function_library_fingerprint = 11216472005761632045, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718819754.726307     786 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(85e60df0144ad97:0:0), session_name()
I0000 00:00:1718819761.034906     786 tpu_compile_op_common.cc:245] Compilation of 85e60df0144ad97:0:0 with session name  took 6.308542655s and succeeded
I0000 00:00:1718819761.097201     786 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(85e60df0144ad97:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2010863_7912925151138109945", property.function_library_fingerprint = 12351699921143827694, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, embe



I0000 00:00:1718819766.697697     851 tpu_compile_op_common.cc:245] Compilation of bf9d3cf6bee83ca2:0:0 with session name  took 5.500070158s and succeeded
I0000 00:00:1718819766.746424     851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bf9d3cf6bee83ca2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2010863_7912925151138109945", property.function_library_fingerprint = 12351699921143827694, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718819766.746655     851 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.6812219023704529 and accuracy: 0.8266806602478027


[I 2024-06-19 17:56:34,802] Trial 47 finished with value: 0.8266806602478027 and parameters: {'num_epochs': 8, 'dropout_rate': 0.3366002998865307, 'weight_decay': 0.046288001779492105, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.0983958363195665}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the T

Epoch 1/7


I0000 00:00:1718820050.660587     835 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bde4d42d4876bd28:0:0), session_name()
I0000 00:00:1718820095.003874     835 tpu_compile_op_common.cc:245] Compilation of bde4d42d4876bd28:0:0 with session name  took 44.343236693s and succeeded
I0000 00:00:1718820095.225546     835 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bde4d42d4876bd28:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2161905_14024361966137086006", property.function_library_fingerprint = 9879456481814727500, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718820164.161862     834 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bb5ab317f5ee703f:0:0), session_name()
I0000 00:00:1718820170.477873     834 tpu_compile_op_common.cc:245] Compilation of bb5ab317f5ee703f:0:0 with session name  took 6.315956018s and succeeded
I0000 00:00:1718820170.534353     834 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bb5ab317f5ee703f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2198413_17529277351611537883", property.function_library_fingerprint = 12850763406538892801, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718820176.601874     793 tpu_compile_op_common.cc:245] Compilation of f7dcb410fdbf9a13:0:0 with session name  took 5.966786341s and succeeded
I0000 00:00:1718820176.652929     793 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f7dcb410fdbf9a13:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2198413_17529277351611537883", property.function_library_fingerprint = 12850763406538892801, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718820176.653159     793 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7744227647781372 and accuracy: 0.8345588445663452


[I 2024-06-19 18:03:21,530] Trial 51 finished with value: 0.8345588445663452 and parameters: {'num_epochs': 7, 'dropout_rate': 0.13154745920152278, 'weight_decay': 0.0719698168065505, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.3311830974495595}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model T

Epoch 1/4


I0000 00:00:1718820458.348607     807 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fa16455c75fa2c9c:0:0), session_name()
I0000 00:00:1718820501.578484     807 tpu_compile_op_common.cc:245] Compilation of fa16455c75fa2c9c:0:0 with session name  took 43.229828144s and succeeded
I0000 00:00:1718820501.804461     807 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fa16455c75fa2c9c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2347855_14912541389794270397", property.function_library_fingerprint = 7929154797133745589, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718820566.547677     831 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(27c42718cd8a1273:0:0), session_name()
I0000 00:00:1718820572.354818     831 tpu_compile_op_common.cc:245] Compilation of 27c42718cd8a1273:0:0 with session name  took 5.807077281s and succeeded
I0000 00:00:1718820572.412967     831 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(27c42718cd8a1273:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2384379_14684266837404076804", property.function_library_fingerprint = 14906286345123882706, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718820578.410786     792 tpu_compile_op_common.cc:245] Compilation of e6acf55eefe23c9a:0:0 with session name  took 5.894303919s and succeeded
I0000 00:00:1718820578.461599     792 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e6acf55eefe23c9a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2384379_14684266837404076804", property.function_library_fingerprint = 14906286345123882706, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718820578.461923     792 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/4
Epoch 3/4
Epoch 4/4
f1 score: 0.5930257439613342 and accuracy: 0.8356092572212219


[I 2024-06-19 18:09:52,913] Trial 57 finished with value: 0.8356092572212219 and parameters: {'num_epochs': 4, 'dropout_rate': 0.3117538524378382, 'weight_decay': 0.09401648542859303, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.13575881417370303}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TF

Epoch 1/9


I0000 00:00:1718820848.620538     839 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c7d05c8e2f489c6:0:0), session_name()
I0000 00:00:1718820891.057698     839 tpu_compile_op_common.cc:245] Compilation of c7d05c8e2f489c6:0:0 with session name  took 42.437106172s and succeeded
I0000 00:00:1718820891.244742     839 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c7d05c8e2f489c6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2528869_14985063790719548974", property.function_library_fingerprint = 12388768619490655378, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718820958.420178     797 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5e0ccdf6d1f8b1cf:0:0), session_name()
I0000 00:00:1718820964.091565     797 tpu_compile_op_common.cc:245] Compilation of 5e0ccdf6d1f8b1cf:0:0 with session name  took 5.67134228s and succeeded
I0000 00:00:1718820964.140940     797 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5e0ccdf6d1f8b1cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2565397_16807053290392072318", property.function_library_fingerprint = 12342377581536496476, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718820969.952833     829 tpu_compile_op_common.cc:245] Compilation of cdc6cb3f50ea1584:0:0 with session name  took 5.678052975s and succeeded
I0000 00:00:1718820969.989953     829 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cdc6cb3f50ea1584:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2565397_16807053290392072318", property.function_library_fingerprint = 12342377581536496476, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718820969.990175     829 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.6203676462173462 and accuracy: 0.806197464466095


[I 2024-06-19 18:16:39,710] Trial 62 finished with value: 0.806197464466095 and parameters: {'num_epochs': 9, 'dropout_rate': 0.3913271376688334, 'weight_decay': 0.04681434703265605, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.4868935136952991}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRo

Epoch 1/14


I0000 00:00:1718821257.615766     801 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ef65a2cd6437c9a9:0:0), session_name()
I0000 00:00:1718821301.511340     801 tpu_compile_op_common.cc:245] Compilation of ef65a2cd6437c9a9:0:0 with session name  took 43.895523499s and succeeded
I0000 00:00:1718821301.716247     801 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ef65a2cd6437c9a9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2718151_5269168842870736520", property.function_library_fingerprint = 17483564445136179385, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718821344.350454     830 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f30d6f73a69017ef:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2718151_5269168842870736520", property.function_library_fingerprint = 17483564445136179385, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718821344.352556     830 tpu_compilation_cache_interface.cc:541] After adding entry for key f30d6f73a69017ef:0:0 with session_name  cache is 81 entries (16804371778 bytes),  marked for eviction 40 entries (8242952330 bytes).
I0000 00:00:1718821



I0000 00:00:1718821380.753532     825 tpu_compile_op_common.cc:245] Compilation of ef9e26cb031f25f8:0:0 with session name  took 5.994623972s and succeeded
I0000 00:00:1718821380.806944     825 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ef9e26cb031f25f8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2754675_6230450457755246020", property.function_library_fingerprint = 12950151331676954604, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718821380.807237     825 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14
f1 score: 0.5726456642150879 and accuracy: 0.7899159789085388


[I 2024-06-19 18:23:50,914] Trial 68 finished with value: 0.7899159789085388 and parameters: {'num_epochs': 14, 'dropout_rate': 0.42729144769493493, 'weight_decay': 0.08577103659909895, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.009320798003850467}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/12


I0000 00:00:1718821701.534178     804 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(465c724abe23c674:0:0), session_name()
I0000 00:00:1718821747.777721     804 tpu_compile_op_common.cc:245] Compilation of 465c724abe23c674:0:0 with session name  took 46.243444549s and succeeded
I0000 00:00:1718821748.006397     804 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(465c724abe23c674:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2915709_6363769752931687894", property.function_library_fingerprint = 10466608967520703649, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718821790.626700     806 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(828b4330c9a291a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2915709_6363769752931687894", property.function_library_fingerprint = 10466608967520703649, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718821790.627451     806 tpu_compilation_cache_interface.cc:541] After adding entry for key 828b4330c9a291a:0:0 with session_name  cache is 85 entries (17579835628 bytes),  marked for eviction 42 entries (9131368386 bytes).
I0000 00:00:171882181



I0000 00:00:1718821831.854452     854 tpu_compile_op_common.cc:245] Compilation of f513bf98bf2563fd:0:0 with session name  took 6.265955101s and succeeded
I0000 00:00:1718821831.905017     854 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f513bf98bf2563fd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_2952233_13617177809467863888", property.function_library_fingerprint = 12339805386189608250, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718821831.905250     854 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7632471323013306 and accuracy: 0.8382353186607361


[I 2024-06-19 18:31:15,262] Trial 72 finished with value: 0.8382353186607361 and parameters: {'num_epochs': 12, 'dropout_rate': 0.26181462718202714, 'weight_decay': 0.040985659057195645, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.28838147171384737}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/16


I0000 00:00:1718822145.055333     836 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(49d4ba04bb318447:0:0), session_name()
I0000 00:00:1718822193.453775     836 tpu_compile_op_common.cc:245] Compilation of 49d4ba04bb318447:0:0 with session name  took 48.398374622s and succeeded
I0000 00:00:1718822193.691110     836 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(49d4ba04bb318447:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3109955_17742220616165420292", property.function_library_fingerprint = 6553979364681388226, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718822241.091848     825 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(50656cdbe67954da:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3109955_17742220616165420292", property.function_library_fingerprint = 6553979364681388226, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718822241.093290     825 tpu_compilation_cache_interface.cc:541] After adding entry for key 50656cdbe67954da:0:0 with session_name  cache is 89 entries (18355299206 bytes),  marked for eviction 47 entries (9968471271 bytes).
I0000 00:00:1718822



I0000 00:00:1718822279.971926     831 tpu_compile_op_common.cc:245] Compilation of c43e97cf3dde23ad:0:0 with session name  took 6.191503039s and succeeded
I0000 00:00:1718822280.021899     831 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c43e97cf3dde23ad:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3146479_2350079273123477915", property.function_library_fingerprint = 11229790217880314714, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718822280.022197     831 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
f1 score: 0.7278674244880676 and accuracy: 0.8335084319114685


[I 2024-06-19 18:38:58,515] Trial 79 finished with value: 0.8335084319114685 and parameters: {'num_epochs': 16, 'dropout_rate': 0.36089195808663127, 'weight_decay': 0.09521758999335866, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.20522846939656658}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model 

Epoch 1/20


I0000 00:00:1718822608.148247     825 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(1a84394e2034e9cd:0:0), session_name()
I0000 00:00:1718822653.973842     825 tpu_compile_op_common.cc:245] Compilation of 1a84394e2034e9cd:0:0 with session name  took 45.825517302s and succeeded
I0000 00:00:1718822654.188307     825 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1a84394e2034e9cd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3310983_6479215720084757990", property.function_library_fingerprint = 2466373181722411379, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718822699.904511     837 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(253154e0b5168652:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3310983_6479215720084757990", property.function_library_fingerprint = 2466373181722411379, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718822699.905836     837 tpu_compilation_cache_interface.cc:541] After adding entry for key 253154e0b5168652:0:0 with session_name  cache is 93 entries (19130914908 bytes),  marked for eviction 50 entries (10573389322 bytes).
I0000 00:00:1718822



I0000 00:00:1718822738.701210     871 tpu_compile_op_common.cc:245] Compilation of e1f60e153789beb8:0:0 with session name  took 6.052588987s and succeeded
I0000 00:00:1718822738.740949     871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e1f60e153789beb8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3347553_2513424418239478407", property.function_library_fingerprint = 11688085140747486888, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718822738.741147     871 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
f1 score: 0.7604992985725403 and accuracy: 0.838760495185852


[I 2024-06-19 18:46:49,286] Trial 86 finished with value: 0.838760495185852 and parameters: {'num_epochs': 20, 'dropout_rate': 0.32141387992711673, 'weight_decay': 0.034837991887254983, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.04112425359674174}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the

Epoch 1/11


I0000 00:00:1718823078.180499     820 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2f8c59f063685404:0:0), session_name()
I0000 00:00:1718823124.327859     820 tpu_compile_op_common.cc:245] Compilation of 2f8c59f063685404:0:0 with session name  took 46.147310084s and succeeded
I0000 00:00:1718823124.559418     820 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2f8c59f063685404:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3518539_1366282295998395360", property.function_library_fingerprint = 2939264455650155129, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718823168.437355     874 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(48c5279be171d499:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3518539_1366282295998395360", property.function_library_fingerprint = 2939264455650155129, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718823168.438344     874 tpu_compilation_cache_interface.cc:541] After adding entry for key 48c5279be171d499:0:0 with session_name  cache is 97 entries (19906392992 bytes),  marked for eviction 55 entries (11542928077 bytes).
I0000 00:00:1718823



I0000 00:00:1718823208.163440     852 tpu_compile_op_common.cc:245] Compilation of 5b32873035047595:0:0 with session name  took 7.052959585s and succeeded
I0000 00:00:1718823208.210731     852 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5b32873035047595:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3555067_14125937519833074121", property.function_library_fingerprint = 14096750047699972288, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718823208.210915     852 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7489544153213501 and accuracy: 0.8350840210914612


[I 2024-06-19 18:54:07,863] Trial 91 finished with value: 0.8350840210914612 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2755019250863474, 'weight_decay': 0.06689179062361725, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.24451574706819335}. Best is trial 7 with value: 0.8518907427787781.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model T

Epoch 1/15


I0000 00:00:1718823520.662957     798 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ef745e1b627b0fcb:0:0), session_name()
I0000 00:00:1718823567.910826     798 tpu_compile_op_common.cc:245] Compilation of ef745e1b627b0fcb:0:0 with session name  took 47.24781908s and succeeded
I0000 00:00:1718823568.129591     798 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ef745e1b627b0fcb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3711291_17064346986461138573", property.function_library_fingerprint = 13007362617960724752, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718823637.041414     810 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8a11e78d08fe865a:0:0), session_name()
I0000 00:00:1718823643.227293     810 tpu_compile_op_common.cc:245] Compilation of 8a11e78d08fe865a:0:0 with session name  took 6.185820582s and succeeded
I0000 00:00:1718823643.285318     810 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8a11e78d08fe865a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3747861_18187571663103088035", property.function_library_fingerprint = 17723060241194084045, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718823649.682249     844 tpu_compile_op_common.cc:245] Compilation of 81f2f7a40131ea17:0:0 with session name  took 6.269833776s and succeeded
I0000 00:00:1718823649.725899     844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(81f2f7a40131ea17:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3747861_18187571663103088035", property.function_library_fingerprint = 17723060241194084045, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718823649.726113     844 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
f1 score: 0.7577962875366211 and accuracy: 0.8371848464012146


[I 2024-06-19 19:01:43,125] Trial 97 finished with value: 0.8371848464012146 and parameters: {'num_epochs': 15, 'dropout_rate': 0.2961025650474741, 'weight_decay': 0.05776246820662401, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.30702015600558175}. Best is trial 88 with value: 0.8524159789085388.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the

Epoch 1/19


I0000 00:00:1718823977.723317     797 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(402cf882aaea26e3:0:0), session_name()
I0000 00:00:1718824027.319873     797 tpu_compile_op_common.cc:245] Compilation of 402cf882aaea26e3:0:0 with session name  took 49.596498647s and succeeded
I0000 00:00:1718824027.559260     797 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(402cf882aaea26e3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3910495_17660196420602050046", property.function_library_fingerprint = 13926080298671820880, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718824071.253642     867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(29f6ac3a05c4d73a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3910495_17660196420602050046", property.function_library_fingerprint = 13926080298671820880, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718824071.254576     867 tpu_compilation_cache_interface.cc:541] After adding entry for key 29f6ac3a05c4d73a:0:0 with session_name  cache is 105 entries (21457209010 bytes),  marked for eviction 61 entries (12928679408 bytes).
I0000 00:00:1718



I0000 00:00:1718824111.081457     867 tpu_compile_op_common.cc:245] Compilation of 97a6ffb388fb84c4:0:0 with session name  took 6.688958661s and succeeded
I0000 00:00:1718824111.134120     867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(97a6ffb388fb84c4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_3947003_12865754911616194004", property.function_library_fingerprint = 16374308662544210400, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718824111.134541     867 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
f1 score: 0.741941511631012 and accuracy: 0.838760495185852


[I 2024-06-19 19:09:38,763] Trial 104 finished with value: 0.838760495185852 and parameters: {'num_epochs': 19, 'dropout_rate': 0.3478804437133182, 'weight_decay': 0.08256893146104731, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.039184356789212366}. Best is trial 88 with value: 0.8524159789085388.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/9


I0000 00:00:1718824449.831930     821 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(52be02eb1f17fb28:0:0), session_name()
I0000 00:00:1718824497.981126     821 tpu_compile_op_common.cc:245] Compilation of 52be02eb1f17fb28:0:0 with session name  took 48.149124133s and succeeded
I0000 00:00:1718824498.187824     821 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(52be02eb1f17fb28:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4116333_17797677244322102445", property.function_library_fingerprint = 15233471606429458063, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718824541.213999     839 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4499139b9205dab0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4116333_17797677244322102445", property.function_library_fingerprint = 15233471606429458063, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718824541.215336     839 tpu_compilation_cache_interface.cc:541] After adding entry for key 4499139b9205dab0:0:0 with session_name  cache is 109 entries (22232687782 bytes),  marked for eviction 65 entries (13702859070 bytes).
I0000 00:00:1718



I0000 00:00:1718824581.431571     792 tpu_compile_op_common.cc:245] Compilation of c461ecc61aab0d65:0:0 with session name  took 7.260580433s and succeeded
I0000 00:00:1718824581.476092     792 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c461ecc61aab0d65:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4152861_15454602615758086672", property.function_library_fingerprint = 9100201276014937965, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718824581.476377     792 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7551916241645813 and accuracy: 0.8534663915634155


2024-06-19 19:17:13.332276: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718824635.113634     873 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(c3908732f5c87bf6:0:0), session_name()
I0000 00:00:1718824640.868318     873 tpu_compile_op_common.cc:245] Compilation of c3908732f5c87bf6:0:0 with session name  took 5.754596156s and succeeded
I0000 00:00:1718824640.909986     873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c3908732f5c87bf6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_6446247013771732028", property.function_library_fingerprint = 13046982609024262057, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718824654.610722     788 tpu_compile_op_common.cc:245] Compilation of 51d44f0f676e677:0:0 with session name  took 6.710079589s and succeeded
I0000 00:00:1718824654.662337     788 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(51d44f0f676e677:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4177353_3417262348530778289", property.function_library_fingerprint = 13046982609024262057, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718824654.662587     788 tpu_compilation_cache_interface.cc:541] After adding entry for key 51d

File ./saved_models/disaster_RoBerta_1_model_trial_24_accuracy_0.8393_avg_score_0.8137_f1_0.7881_pre_fine_tuning_submission.csv (pre_fine_tuning) has been removed.
Epoch 1/3


I0000 00:00:1718824753.670996     851 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(48b904c0e2acd43d:0:0), session_name()
I0000 00:00:1718824842.707048     851 tpu_compile_op_common.cc:245] Compilation of 48b904c0e2acd43d:0:0 with session name  took 1m29.0359982s and succeeded
I0000 00:00:1718824842.996054     851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(48b904c0e2acd43d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4212200_17879808304743999706", property.function_library_fingerprint = 12097184016909970514, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0,

Epoch 2/3
Epoch 3/3
File ./saved_models/disaster_RoBerta_1_model_trial_38_accuracy_0.8461_avg_score_0.7951_f1_0.7441_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 19:22:12,727] Trial 111 finished with value: 0.8534663915634155 and parameters: {'num_epochs': 9, 'dropout_rate': 0.2411476134778031, 'weight_decay': 0.07840948569757004, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.36522927500239794}. Best is trial 111 with value: 0.8534663915634155.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/8


I0000 00:00:1718825205.552073     859 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(17e4669afc5280a1:0:0), session_name()
I0000 00:00:1718825251.245566     859 tpu_compile_op_common.cc:245] Compilation of 17e4669afc5280a1:0:0 with session name  took 45.693445811s and succeeded
I0000 00:00:1718825251.475124     859 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(17e4669afc5280a1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4378728_995473581698028121", property.function_library_fingerprint = 15280078735421965927, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718825325.219514     873 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(56709066c0e92973:0:0), session_name()
I0000 00:00:1718825332.180731     873 tpu_compile_op_common.cc:245] Compilation of 56709066c0e92973:0:0 with session name  took 6.961131806s and succeeded
I0000 00:00:1718825332.268936     873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(56709066c0e92973:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4415256_465347986527520253", property.function_library_fingerprint = 5110956395691767869, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, emb



I0000 00:00:1718825339.270365     829 tpu_compile_op_common.cc:245] Compilation of a0c6122bd0057653:0:0 with session name  took 6.883501092s and succeeded
I0000 00:00:1718825339.320714     829 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a0c6122bd0057653:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4415256_465347986527520253", property.function_library_fingerprint = 5110956395691767869, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718825339.320966     829 tpu_compilation_cache_interface.cc:541] After adding entry for key

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7353450059890747 and accuracy: 0.8340336084365845


[I 2024-06-19 19:29:29,481] Trial 118 finished with value: 0.8340336084365845 and parameters: {'num_epochs': 8, 'dropout_rate': 0.24861677929810838, 'weight_decay': 0.0893066139793009, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.40063311589711176}. Best is trial 111 with value: 0.8534663915634155.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/5


I0000 00:00:1718825640.138122     821 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e5e2c36bfbaef9:0:0), session_name()
I0000 00:00:1718825686.936165     821 tpu_compile_op_common.cc:245] Compilation of e5e2c36bfbaef9:0:0 with session name  took 46.797965725s and succeeded
I0000 00:00:1718825687.145158     821 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e5e2c36bfbaef9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4566370_8850523831497497838", property.function_library_fingerprint = 2506254192047581146, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, embeddi



I0000 00:00:1718825756.571935     822 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(cab7fda5439ce983:0:0), session_name()
I0000 00:00:1718825762.881480     822 tpu_compile_op_common.cc:245] Compilation of cab7fda5439ce983:0:0 with session name  took 6.309492725s and succeeded
I0000 00:00:1718825762.932347     822 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cab7fda5439ce983:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4602898_831139927119255398", property.function_library_fingerprint = 11300071588796618742, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718825769.783014     780 tpu_compile_op_common.cc:245] Compilation of c3225c990a39013b:0:0 with session name  took 6.7050998s and succeeded
I0000 00:00:1718825769.830947     780 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c3225c990a39013b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4602898_831139927119255398", property.function_library_fingerprint = 11300071588796618742, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718825769.831243     780 tpu_compilation_cache_interface.cc:541] After adding entry for key 

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
f1 score: 0.6756715774536133 and accuracy: 0.8340336084365845


[I 2024-06-19 19:36:28,121] Trial 123 finished with value: 0.8340336084365845 and parameters: {'num_epochs': 5, 'dropout_rate': 0.25508252276565324, 'weight_decay': 0.044654432936987905, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.330527747720937}. Best is trial 111 with value: 0.8534663915634155.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/7


I0000 00:00:1718826057.321137     793 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(975b54de9151ea1b:0:0), session_name()
I0000 00:00:1718826106.617080     793 tpu_compile_op_common.cc:245] Compilation of 975b54de9151ea1b:0:0 with session name  took 49.295892114s and succeeded
I0000 00:00:1718826106.837516     793 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(975b54de9151ea1b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4749044_2666999749037068191", property.function_library_fingerprint = 2682032851862140566, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718826153.782261     829 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5e5c8dd26d0bc71a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4749044_2666999749037068191", property.function_library_fingerprint = 2682032851862140566, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718826153.783509     829 tpu_compilation_cache_interface.cc:541] After adding entry for key 5e5c8dd26d0bc71a:0:0 with session_name  cache is 126 entries (25650946298 bytes),  marked for eviction 84 entries (17287238102 bytes).
I0000 00:00:171882



I0000 00:00:1718826193.623588     796 tpu_compile_op_common.cc:245] Compilation of 6b63caec2ca26866:0:0 with session name  took 6.671395963s and succeeded
I0000 00:00:1718826193.680410     796 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6b63caec2ca26866:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4785572_10920754761350657991", property.function_library_fingerprint = 628379075100954133, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718826193.680495     796 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7515805959701538 and accuracy: 0.8293067216873169


[I 2024-06-19 19:43:39,205] Trial 128 finished with value: 0.8293067216873169 and parameters: {'num_epochs': 7, 'dropout_rate': 0.19997870159931905, 'weight_decay': 0.007397804221689459, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.459769804954218}. Best is trial 111 with value: 0.8534663915634155.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/6


I0000 00:00:1718826488.469932     792 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d8069a719f3bf49d:0:0), session_name()
I0000 00:00:1718826535.916236     792 tpu_compile_op_common.cc:245] Compilation of d8069a719f3bf49d:0:0 with session name  took 47.446233086s and succeeded
I0000 00:00:1718826536.129539     792 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d8069a719f3bf49d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4935030_109173760019821592", property.function_library_fingerprint = 7724730479149290943, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718826610.670469     810 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dca23aa7428c2491:0:0), session_name()
I0000 00:00:1718826617.551641     810 tpu_compile_op_common.cc:245] Compilation of dca23aa7428c2491:0:0 with session name  took 6.881118696s and succeeded
I0000 00:00:1718826617.615048     810 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dca23aa7428c2491:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4971558_7566695892706511190", property.function_library_fingerprint = 1858191677405799172, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718826624.410345     868 tpu_compile_op_common.cc:245] Compilation of d544ab4ca98aaf58:0:0 with session name  took 6.678321171s and succeeded
I0000 00:00:1718826624.464309     868 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d544ab4ca98aaf58:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_4971558_7566695892706511190", property.function_library_fingerprint = 1858191677405799172, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718826624.464592     868 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.731292724609375 and accuracy: 0.8403361439704895


[I 2024-06-19 19:50:46,295] Trial 133 finished with value: 0.8403361439704895 and parameters: {'num_epochs': 6, 'dropout_rate': 0.21952932378193568, 'weight_decay': 0.0027612120766162527, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.36403677695526}. Best is trial 111 with value: 0.8534663915634155.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/10


I0000 00:00:1718826916.303954     801 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(39c08487b261276c:0:0), session_name()
I0000 00:00:1718826962.629063     801 tpu_compile_op_common.cc:245] Compilation of 39c08487b261276c:0:0 with session name  took 46.325041212s and succeeded
I0000 00:00:1718826962.829993     801 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39c08487b261276c:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5119360_17405432603608317736", property.function_library_fingerprint = 7669565070516485689, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718827035.281474     783 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e86cbb688add2f8d:0:0), session_name()
I0000 00:00:1718827042.484170     783 tpu_compile_op_common.cc:245] Compilation of e86cbb688add2f8d:0:0 with session name  took 7.202633377s and succeeded
I0000 00:00:1718827042.537371     783 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e86cbb688add2f8d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5155888_9269688958177757347", property.function_library_fingerprint = 14394091863311848565, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718827049.876573     869 tpu_compile_op_common.cc:245] Compilation of adbd7c7a56eddb8:0:0 with session name  took 7.216011844s and succeeded
I0000 00:00:1718827049.932461     869 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(adbd7c7a56eddb8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5155888_9269688958177757347", property.function_library_fingerprint = 14394091863311848565, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718827049.932748     869 tpu_compilation_cache_interface.cc:541] After adding entry for key

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7673306465148926 and accuracy: 0.8539915680885315


2024-06-19 19:58:26.491917: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718827108.417561     871 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(96b672b0fad82754:0:0), session_name()
I0000 00:00:1718827113.518189     871 tpu_compile_op_common.cc:245] Compilation of 96b672b0fad82754:0:0 with session name  took 5.100581455s and succeeded
I0000 00:00:1718827113.554548     871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(96b672b0fad82754:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_9849958829140994964", property.function_library_fingerprint = 17583173102850653453, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topo



I0000 00:00:1718827126.862359     781 tpu_compile_op_common.cc:245] Compilation of cc1e4c2d513237ae:0:0 with session name  took 6.879727008s and succeeded
I0000 00:00:1718827126.899143     781 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(cc1e4c2d513237ae:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5182036_17983218864857793207", property.function_library_fingerprint = 17583173102850653453, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718827126.899350     781 tpu_compilation_cache_interface.cc:541] After adding entry for key 

File ./saved_models/disaster_RoBerta_1_model_trial_38_accuracy_0.8461_avg_score_0.7951_f1_0.7441_pre_fine_tuning_submission.csv (pre_fine_tuning) has been removed.
Epoch 1/3


I0000 00:00:1718827229.599891     871 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(fd6b4d3d1186bf83:0:0), session_name()
I0000 00:00:1718827307.901928     871 tpu_compile_op_common.cc:245] Compilation of fd6b4d3d1186bf83:0:0 with session name  took 1m18.301973712s and succeeded
I0000 00:00:1718827308.201419     871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fd6b4d3d1186bf83:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5216883_7037050790434813146", property.function_library_fingerprint = 15346583485933877587, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0

Epoch 2/3
Epoch 3/3
File ./saved_models/disaster_RoBerta_1_model_trial_12_accuracy_0.8493_avg_score_0.7968_f1_0.7444_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 20:03:15,126] Trial 138 finished with value: 0.8539915680885315 and parameters: {'num_epochs': 10, 'dropout_rate': 0.19582027072865782, 'weight_decay': 0.05209151418268603, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.29202824359582535}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/9


I0000 00:00:1718827668.500258     798 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(60e9d5f00596320d:0:0), session_name()
I0000 00:00:1718827717.705214     798 tpu_compile_op_common.cc:245] Compilation of 60e9d5f00596320d:0:0 with session name  took 49.204880919s and succeeded
I0000 00:00:1718827717.927562     798 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(60e9d5f00596320d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5383553_10746519773900148938", property.function_library_fingerprint = 13817753661105636233, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718827788.896022     851 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(aa080fd8a90ec9b4:0:0), session_name()
I0000 00:00:1718827795.569424     851 tpu_compile_op_common.cc:245] Compilation of aa080fd8a90ec9b4:0:0 with session name  took 6.673342461s and succeeded
I0000 00:00:1718827795.626499     851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aa080fd8a90ec9b4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5420123_2231664485413816147", property.function_library_fingerprint = 14227653460276520716, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718827802.443279     854 tpu_compile_op_common.cc:245] Compilation of da8209800e8df4a:0:0 with session name  took 6.700660031s and succeeded
I0000 00:00:1718827802.497851     854 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(da8209800e8df4a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5420123_2231664485413816147", property.function_library_fingerprint = 14227653460276520716, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718827802.498055     854 tpu_compilation_cache_interface.cc:541] After adding entry for key

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7783921957015991 and accuracy: 0.8445377945899963


[I 2024-06-19 20:10:32,778] Trial 147 finished with value: 0.8445377945899963 and parameters: {'num_epochs': 9, 'dropout_rate': 0.19116199891887892, 'weight_decay': 0.05660801662546617, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.23381713457488112}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of t

Epoch 1/10


I0000 00:00:1718828098.655134     802 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f637f30fbeea109e:0:0), session_name()
I0000 00:00:1718828143.478976     802 tpu_compile_op_common.cc:245] Compilation of f637f30fbeea109e:0:0 with session name  took 44.82377042s and succeeded
I0000 00:00:1718828143.670019     802 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f637f30fbeea109e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5572877_826132518811493003", property.function_library_fingerprint = 1135030739708993268, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, emb



I0000 00:00:1718828188.361112     791 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(1f6adca6a19fd612:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5572877_826132518811493003", property.function_library_fingerprint = 1135030739708993268, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718828188.362031     791 tpu_compilation_cache_interface.cc:541] After adding entry for key 1f6adca6a19fd612:0:0 with session_name  cache is 147 entries (29844806146 bytes),  marked for eviction 105 entries (21457209010 bytes).
I0000 00:00:171882



I0000 00:00:1718828228.336133     838 tpu_compile_op_common.cc:245] Compilation of 79e0d197c2f10d2b:0:0 with session name  took 6.689840624s and succeeded
I0000 00:00:1718828228.384558     838 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(79e0d197c2f10d2b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5609401_11327111415308565693", property.function_library_fingerprint = 16713510027930800599, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718828228.384825     838 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.8089723587036133 and accuracy: 0.838760495185852


[I 2024-06-19 20:17:43,983] Trial 152 finished with value: 0.838760495185852 and parameters: {'num_epochs': 10, 'dropout_rate': 0.10325845891866783, 'weight_decay': 0.052269893984016276, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.32807348202341946}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/3


I0000 00:00:1718828537.500736     826 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(54db9b49fd386ca0:0:0), session_name()
I0000 00:00:1718828586.243809     826 tpu_compile_op_common.cc:245] Compilation of 54db9b49fd386ca0:0:0 with session name  took 48.743021147s and succeeded
I0000 00:00:1718828586.460096     826 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(54db9b49fd386ca0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5763755_5982219832032013490", property.function_library_fingerprint = 14043851395594916506, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718828633.901994     854 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bc006b9d675bd8a2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5763755_5982219832032013490", property.function_library_fingerprint = 14043851395594916506, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718828633.903152     854 tpu_compilation_cache_interface.cc:541] After adding entry for key bc006b9d675bd8a2:0:0 with session_name  cache is 151 entries (30620009854 bytes),  marked for eviction 109 entries (22232687782 bytes).
I0000 00:00:1718



I0000 00:00:1718828674.111565     785 tpu_compile_op_common.cc:245] Compilation of e9867d8d0b98ff58:0:0 with session name  took 6.935214775s and succeeded
I0000 00:00:1718828674.162592     785 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e9867d8d0b98ff58:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5800263_8560411464812583279", property.function_library_fingerprint = 6892292606156411378, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718828674.162833     785 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/3
Epoch 3/3
f1 score: 0.5742538571357727 and accuracy: 0.8230041861534119


[I 2024-06-19 20:24:43,594] Trial 159 finished with value: 0.8230041861534119 and parameters: {'num_epochs': 3, 'dropout_rate': 0.31034155455581985, 'weight_decay': 0.05390570199137822, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.2562255365994962}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/8


I0000 00:00:1718828958.411780     872 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(817568986289a393:0:0), session_name()
I0000 00:00:1718829007.878657     872 tpu_compile_op_common.cc:245] Compilation of 817568986289a393:0:0 with session name  took 49.466800758s and succeeded
I0000 00:00:1718829008.092648     872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(817568986289a393:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5943025_17701439837035084941", property.function_library_fingerprint = 8572211432811216803, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718829081.058461     850 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8d7405d2ea09cf00:0:0), session_name()
I0000 00:00:1718829088.277750     850 tpu_compile_op_common.cc:245] Compilation of 8d7405d2ea09cf00:0:0 with session name  took 7.219221786s and succeeded
I0000 00:00:1718829088.332287     850 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8d7405d2ea09cf00:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5979533_368811156731014471", property.function_library_fingerprint = 4543599247663573233, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, emb



I0000 00:00:1718829095.858621     781 tpu_compile_op_common.cc:245] Compilation of 4a25542843b0bc57:0:0 with session name  took 7.417885726s and succeeded
I0000 00:00:1718829095.910241     781 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4a25542843b0bc57:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_5979533_368811156731014471", property.function_library_fingerprint = 4543599247663573233, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718829095.910461     781 tpu_compilation_cache_interface.cc:541] After adding entry for key

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7493466734886169 and accuracy: 0.8167017102241516


[I 2024-06-19 20:32:06,245] Trial 165 finished with value: 0.8167017102241516 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2132741341654663, 'weight_decay': 0.060550684355051364, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.9147440956012015}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/19


I0000 00:00:1718829398.154837     839 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(d7777285f785ab66:0:0), session_name()
I0000 00:00:1718829448.556353     839 tpu_compile_op_common.cc:245] Compilation of d7777285f785ab66:0:0 with session name  took 50.401427382s and succeeded
I0000 00:00:1718829448.791184     839 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d7777285f785ab66:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6130647_6916647554440786907", property.function_library_fingerprint = 9376787861280118910, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718829518.852239     819 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(bce75577e0e2e422:0:0), session_name()
I0000 00:00:1718829525.064684     819 tpu_compile_op_common.cc:245] Compilation of bce75577e0e2e422:0:0 with session name  took 6.212401673s and succeeded
I0000 00:00:1718829525.105585     819 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(bce75577e0e2e422:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6167175_17369365719509097091", property.function_library_fingerprint = 13336777727093709298, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718829532.220037     808 tpu_compile_op_common.cc:245] Compilation of 2770e5f52e88fc4a:0:0 with session name  took 7.017210334s and succeeded
I0000 00:00:1718829532.266829     808 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2770e5f52e88fc4a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6167175_17369365719509097091", property.function_library_fingerprint = 13336777727093709298, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718829532.267034     808 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
f1 score: 0.7779588103294373 and accuracy: 0.8371848464012146


[I 2024-06-19 20:39:59,921] Trial 172 finished with value: 0.8371848464012146 and parameters: {'num_epochs': 19, 'dropout_rate': 0.28042510697560996, 'weight_decay': 0.049271541161832626, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3943916275364224}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/18


I0000 00:00:1718829867.660617     801 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(39846804c6b909e9:0:0), session_name()
I0000 00:00:1718829918.707113     801 tpu_compile_op_common.cc:245] Compilation of 39846804c6b909e9:0:0 with session name  took 51.046410421s and succeeded
I0000 00:00:1718829918.951030     801 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(39846804c6b909e9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6336505_74016998291751107", property.function_library_fingerprint = 1379140227145267449, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, emb



I0000 00:00:1718829962.579560     862 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2cfc1d0c8ecdfb9e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6336505_74016998291751107", property.function_library_fingerprint = 1379140227145267449, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718829962.580569     862 tpu_compilation_cache_interface.cc:541] After adding entry for key 2cfc1d0c8ecdfb9e:0:0 with session_name  cache is 163 entries (32946175498 bytes),  marked for eviction 121 entries (24582860979 bytes).
I0000 00:00:1718829



I0000 00:00:1718830001.158616     826 tpu_compile_op_common.cc:245] Compilation of 3e334300a1f77d54:0:0 with session name  took 6.710685738s and succeeded
I0000 00:00:1718830001.215157     826 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3e334300a1f77d54:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6373033_18048197206271997587", property.function_library_fingerprint = 12154341767724922818, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718830001.215490     826 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
f1 score: 0.8390339016914368 and accuracy: 0.818802535533905


[I 2024-06-19 20:47:46,992] Trial 178 finished with value: 0.818802535533905 and parameters: {'num_epochs': 18, 'dropout_rate': 0.12630646651518979, 'weight_decay': 0.048126351299585556, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.21834590522733993}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/7


I0000 00:00:1718830336.572358     869 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(604429bfd44d4047:0:0), session_name()
I0000 00:00:1718830384.430781     869 tpu_compile_op_common.cc:245] Compilation of 604429bfd44d4047:0:0 with session name  took 47.85836803s and succeeded
I0000 00:00:1718830384.668909     869 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(604429bfd44d4047:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6540635_14869129967402457524", property.function_library_fingerprint = 15612223553284769741, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718830430.269363     873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a7bca422446b5633:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6540635_14869129967402457524", property.function_library_fingerprint = 15612223553284769741, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718830430.270540     873 tpu_compilation_cache_interface.cc:541] After adding entry for key a7bca422446b5633:0:0 with session_name  cache is 167 entries (33721379294 bytes),  marked for eviction 125 entries (25358341495 bytes).
I0000 00:00:171



I0000 00:00:1718830469.842004     843 tpu_compile_op_common.cc:245] Compilation of 4870b21540f59efb:0:0 with session name  took 7.017581952s and succeeded
I0000 00:00:1718830469.893972     843 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4870b21540f59efb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6577143_13740881248847739724", property.function_library_fingerprint = 4005202091513364870, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718830469.894310     843 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7631034851074219 and accuracy: 0.8135504126548767


[I 2024-06-19 20:54:53,383] Trial 184 finished with value: 0.8135504126548767 and parameters: {'num_epochs': 7, 'dropout_rate': 0.16259293000288527, 'weight_decay': 0.006898130055910307, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.3041705100174034}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mo

Epoch 1/9


I0000 00:00:1718830762.158997     873 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(444e2c24fd099db3:0:0), session_name()
I0000 00:00:1718830811.182327     873 tpu_compile_op_common.cc:245] Compilation of 444e2c24fd099db3:0:0 with session name  took 49.023261544s and succeeded
I0000 00:00:1718830811.405038     873 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(444e2c24fd099db3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6726743_11531756173526398825", property.function_library_fingerprint = 9131131439782528660, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718830857.344322     825 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(11158223c869ba54:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6726743_11531756173526398825", property.function_library_fingerprint = 9131131439782528660, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718830857.346207     825 tpu_compilation_cache_interface.cc:541] After adding entry for key 11158223c869ba54:0:0 with session_name  cache is 171 entries (34496997612 bytes),  marked for eviction 129 entries (26133819899 bytes).
I0000 00:00:1718



I0000 00:00:1718830899.271087     840 tpu_compile_op_common.cc:245] Compilation of 32feaf86066f4250:0:0 with session name  took 6.353083367s and succeeded
I0000 00:00:1718830899.313642     840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(32feaf86066f4250:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6763313_10613627400656771154", property.function_library_fingerprint = 8936690370237349902, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718830899.313842     840 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7561998963356018 and accuracy: 0.845588207244873


[I 2024-06-19 21:02:10,040] Trial 191 finished with value: 0.845588207244873 and parameters: {'num_epochs': 9, 'dropout_rate': 0.24452672835928002, 'weight_decay': 0.08064739431468794, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.4229282550088997}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the

Epoch 1/13


I0000 00:00:1718831205.323395     870 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f9ff41ea8732be38:0:0), session_name()
I0000 00:00:1718831255.082118     870 tpu_compile_op_common.cc:245] Compilation of f9ff41ea8732be38:0:0 with session name  took 49.758668426s and succeeded
I0000 00:00:1718831255.284094     870 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f9ff41ea8732be38:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6916083_10495478617204389940", property.function_library_fingerprint = 10058885147586335925, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718831327.764632     821 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f355ec2beabb2d47:0:0), session_name()
I0000 00:00:1718831334.374113     821 tpu_compile_op_common.cc:245] Compilation of f355ec2beabb2d47:0:0 with session name  took 6.609397344s and succeeded
I0000 00:00:1718831334.423435     821 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f355ec2beabb2d47:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6952611_1483015220317595376", property.function_library_fingerprint = 3362008958053175226, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718831340.802662     794 tpu_compile_op_common.cc:245] Compilation of c04e7b9d4273fd3:0:0 with session name  took 6.266657165s and succeeded
I0000 00:00:1718831340.850013     794 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c04e7b9d4273fd3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6952611_1483015220317595376", property.function_library_fingerprint = 3362008958053175226, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718831340.850260     794 tpu_compilation_cache_interface.cc:541] After adding entry for key 

Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7689634561538696 and accuracy: 0.8497899174690247


2024-06-19 21:10:13.182760: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.
I0000 00:00:1718831414.974905     781 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(eb1b57faf1d9e95:0:0), session_name()
I0000 00:00:1718831420.808718     781 tpu_compile_op_common.cc:245] Compilation of eb1b57faf1d9e95:0:0 with session name  took 5.833756592s and succeeded
I0000 00:00:1718831420.844793     781 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(eb1b57faf1d9e95:0:0), session_name(), subgraph_key(std::string(property.function_name) = "cluster_predict_function_11773258499782089273", property.function_library_fingerprint = 8368939639912079446, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topolog



I0000 00:00:1718831434.521827     810 tpu_compile_op_common.cc:245] Compilation of 9e69ac28586469ee:0:0 with session name  took 6.646837621s and succeeded
I0000 00:00:1718831434.577965     810 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9e69ac28586469ee:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_6983727_10632827354064094582", property.function_library_fingerprint = 8368939639912079446, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "24,55,;24,55,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718831434.578395     810 tpu_compilation_cache_interface.cc:541] After adding entry for key 9

File ./saved_models/disaster_RoBerta_1_model_trial_12_accuracy_0.8493_avg_score_0.7968_f1_0.7444_pre_fine_tuning_submission.csv (pre_fine_tuning) has been removed.
Epoch 1/4


I0000 00:00:1718831533.325925     795 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b3efa7fa78b983da:0:0), session_name()
I0000 00:00:1718831619.570436     795 tpu_compile_op_common.cc:245] Compilation of b3efa7fa78b983da:0:0 with session name  took 1m26.244437438s and succeeded
I0000 00:00:1718831619.844161     795 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b3efa7fa78b983da:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7018574_16151212365382866286", property.function_library_fingerprint = 10162615201793565600, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 

Epoch 2/4
Epoch 3/4
Epoch 4/4
File ./saved_models/disaster_RoBerta_1_model_trial_196_accuracy_0.8498_avg_score_0.8094_f1_0.7690_post_fine_tuning_submission.csv (post_fine_tuning) has been removed.


[I 2024-06-19 21:15:10,173] Trial 196 finished with value: 0.8497899174690247 and parameters: {'num_epochs': 13, 'dropout_rate': 0.26056750328479167, 'weight_decay': 0.07624694208102817, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3692465599420055}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/13


I0000 00:00:1718831986.721576     815 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3d2297dbace7ad6a:0:0), session_name()
I0000 00:00:1718832037.321486     815 tpu_compile_op_common.cc:245] Compilation of 3d2297dbace7ad6a:0:0 with session name  took 50.599847912s and succeeded
I0000 00:00:1718832037.553920     815 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3d2297dbace7ad6a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7185826_16787330959427286314", property.function_library_fingerprint = 595135740286497031, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718832080.730826     802 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(551ea02a58dce51b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7185826_16787330959427286314", property.function_library_fingerprint = 595135740286497031, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718832080.732412     802 tpu_compilation_cache_interface.cc:541] After adding entry for key 551ea02a58dce51b:0:0 with session_name  cache is 184 entries (37139922238 bytes),  marked for eviction 142 entries (28776668308 bytes).
I0000 00:00:17188



I0000 00:00:1718832119.470639     827 tpu_compile_op_common.cc:245] Compilation of 12a604e12479515e:0:0 with session name  took 6.364914875s and succeeded
I0000 00:00:1718832119.520060     827 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(12a604e12479515e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7222396_3706148607099607543", property.function_library_fingerprint = 4512582155962561735, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718832119.520370     827 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7802747488021851 and accuracy: 0.8476890921592712


[I 2024-06-19 21:22:47,243] Trial 207 finished with value: 0.8476890921592712 and parameters: {'num_epochs': 13, 'dropout_rate': 0.2315084768417547, 'weight_decay': 0.012293414314633617, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.3823357445929333}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of t

Epoch 1/14


I0000 00:00:1718832438.560745     843 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(dbca5bad4d041e57:0:0), session_name()
I0000 00:00:1718832487.708853     843 tpu_compile_op_common.cc:245] Compilation of dbca5bad4d041e57:0:0 with session name  took 49.147983486s and succeeded
I0000 00:00:1718832487.919292     843 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(dbca5bad4d041e57:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7381790_14329472889297247957", property.function_library_fingerprint = 4694567982884532398, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718832561.122997     860 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(27412f14e641b9ef:0:0), session_name()
I0000 00:00:1718832568.045669     860 tpu_compile_op_common.cc:245] Compilation of 27412f14e641b9ef:0:0 with session name  took 6.922571968s and succeeded
I0000 00:00:1718832568.102665     860 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(27412f14e641b9ef:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7418318_13625338906491734814", property.function_library_fingerprint = 358578657458584405, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718832574.768393     811 tpu_compile_op_common.cc:245] Compilation of 5b1ab5a8f83f0637:0:0 with session name  took 6.554732805s and succeeded
I0000 00:00:1718832574.828062     811 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5b1ab5a8f83f0637:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7418318_13625338906491734814", property.function_library_fingerprint = 358578657458584405, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718832574.828513     811 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/14
Epoch 3/14
Epoch 4/14
Epoch 5/14
Epoch 6/14
Epoch 7/14
Epoch 8/14
Epoch 9/14
Epoch 10/14
Epoch 11/14
Epoch 12/14
Epoch 13/14
Epoch 14/14
f1 score: 0.7756128907203674 and accuracy: 0.838760495185852


[I 2024-06-19 21:30:24,914] Trial 212 finished with value: 0.838760495185852 and parameters: {'num_epochs': 14, 'dropout_rate': 0.25502682204961735, 'weight_decay': 0.07657673367071031, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8311290886291436}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/10


I0000 00:00:1718832895.910754     861 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(f921a335016cb3b5:0:0), session_name()
I0000 00:00:1718832943.105458     861 tpu_compile_op_common.cc:245] Compilation of f921a335016cb3b5:0:0 with session name  took 47.194658594s and succeeded
I0000 00:00:1718832943.299630     861 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f921a335016cb3b5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7579510_17491581138230306902", property.function_library_fingerprint = 17036954426137660484, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718833012.442695     803 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(7a50ba8d305f5cc0:0:0), session_name()
I0000 00:00:1718833018.349451     803 tpu_compile_op_common.cc:245] Compilation of 7a50ba8d305f5cc0:0:0 with session name  took 5.906709827s and succeeded
I0000 00:00:1718833018.393161     803 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7a50ba8d305f5cc0:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7616080_18316006856557253263", property.function_library_fingerprint = 3146039479066996623, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718833025.188211     802 tpu_compile_op_common.cc:245] Compilation of 313b2add832e1410:0:0 with session name  took 6.669758924s and succeeded
I0000 00:00:1718833025.237014     802 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(313b2add832e1410:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7616080_18316006856557253263", property.function_library_fingerprint = 3146039479066996623, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718833025.237379     802 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7691619396209717 and accuracy: 0.8429622054100037


[I 2024-06-19 21:37:40,692] Trial 220 finished with value: 0.8429622054100037 and parameters: {'num_epochs': 10, 'dropout_rate': 0.20574429307609457, 'weight_decay': 0.08569503839183358, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.241904368427722}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of th

Epoch 1/9


I0000 00:00:1718833331.051635     789 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(83e94b0181115171:0:0), session_name()
I0000 00:00:1718833379.618351     789 tpu_compile_op_common.cc:245] Compilation of 83e94b0181115171:0:0 with session name  took 48.566662635s and succeeded
I0000 00:00:1718833379.840383     789 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(83e94b0181115171:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7770490_4071309688864667106", property.function_library_fingerprint = 5939960251396226897, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718833427.008366     811 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9ff167fde8bdf48:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7770490_4071309688864667106", property.function_library_fingerprint = 5939960251396226897, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718833427.010175     811 tpu_compilation_cache_interface.cc:541] After adding entry for key 9ff167fde8bdf48:0:0 with session_name  cache is 196 entries (39466488954 bytes),  marked for eviction 154 entries (31102735382 bytes).
I0000 00:00:1718833



I0000 00:00:1718833467.293355     799 tpu_compile_op_common.cc:245] Compilation of 95e7bba0bf29a6fa:0:0 with session name  took 6.4408212s and succeeded
I0000 00:00:1718833467.342722     799 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(95e7bba0bf29a6fa:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7807014_9571604027904592023", property.function_library_fingerprint = 5632892025403140500, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718833467.342979     799 tpu_compilation_cache_interface.cc:541] After adding entry for key 

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7481058239936829 and accuracy: 0.8450630307197571


[I 2024-06-19 21:44:58,129] Trial 227 finished with value: 0.8450630307197571 and parameters: {'num_epochs': 9, 'dropout_rate': 0.23718954499734912, 'weight_decay': 0.015887203700754776, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.3629277949405524}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/12


I0000 00:00:1718833770.905720     867 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(8e67e56082c13dc1:0:0), session_name()
I0000 00:00:1718833820.165235     867 tpu_compile_op_common.cc:245] Compilation of 8e67e56082c13dc1:0:0 with session name  took 49.259469161s and succeeded
I0000 00:00:1718833820.434235     867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8e67e56082c13dc1:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7959712_16343330217565967785", property.function_library_fingerprint = 14378445478478362593, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718833889.404030     803 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4877fca5109027a4:0:0), session_name()
I0000 00:00:1718833895.564480     803 tpu_compile_op_common.cc:245] Compilation of 4877fca5109027a4:0:0 with session name  took 6.16039687s and succeeded
I0000 00:00:1718833895.618138     803 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4877fca5109027a4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7996220_10354654106857016510", property.function_library_fingerprint = 3778380642080414826, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718833902.231051     864 tpu_compile_op_common.cc:245] Compilation of a33c0d5a4cbf7c3a:0:0 with session name  took 6.468330733s and succeeded
I0000 00:00:1718833902.281247     864 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a33c0d5a4cbf7c3a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_7996220_10354654106857016510", property.function_library_fingerprint = 3778380642080414826, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718833902.281501     864 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7773191928863525 and accuracy: 0.8392857313156128


[I 2024-06-19 21:52:26,893] Trial 235 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 12, 'dropout_rate': 0.24694235733403683, 'weight_decay': 0.0452524614119517, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.39059958176536863}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mo

Epoch 1/11


I0000 00:00:1718834220.055701     859 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(44840dc21bcb7b88:0:0), session_name()
I0000 00:00:1718834268.360617     859 tpu_compile_op_common.cc:245] Compilation of 44840dc21bcb7b88:0:0 with session name  took 48.304852635s and succeeded
I0000 00:00:1718834268.588948     859 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(44840dc21bcb7b88:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8154100_935722655335172124", property.function_library_fingerprint = 18170026058535186075, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718834311.618002     834 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7fcdd41fe58cf52f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8154100_935722655335172124", property.function_library_fingerprint = 18170026058535186075, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718834311.619082     834 tpu_compilation_cache_interface.cc:541] After adding entry for key 7fcdd41fe58cf52f:0:0 with session_name  cache is 204 entries (41017310700 bytes),  marked for eviction 162 entries (32653569271 bytes).
I0000 00:00:17188



I0000 00:00:1718834351.415185     824 tpu_compile_op_common.cc:245] Compilation of 119f55602507b6ab:0:0 with session name  took 6.625012884s and succeeded
I0000 00:00:1718834351.460877     824 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(119f55602507b6ab:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8190670_14350239537709461318", property.function_library_fingerprint = 2636140154207908009, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718834351.461145     824 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7631662487983704 and accuracy: 0.838760495185852


[I 2024-06-19 21:59:51,275] Trial 243 finished with value: 0.838760495185852 and parameters: {'num_epochs': 11, 'dropout_rate': 0.2312888173376874, 'weight_decay': 0.0019201743813756361, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.4333361794985368}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of t

Epoch 1/13


I0000 00:00:1718834659.796136     788 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2782215cb93d6e9b:0:0), session_name()
I0000 00:00:1718834707.110428     788 tpu_compile_op_common.cc:245] Compilation of 2782215cb93d6e9b:0:0 with session name  took 47.314216432s and succeeded
I0000 00:00:1718834707.359484     788 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2782215cb93d6e9b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8346894_13711610142582746464", property.function_library_fingerprint = 4058617721030949781, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718834754.271689     835 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3104093687439e1d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8346894_13711610142582746464", property.function_library_fingerprint = 4058617721030949781, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718834754.273006     835 tpu_compilation_cache_interface.cc:541] After adding entry for key 3104093687439e1d:0:0 with session_name  cache is 208 entries (41792929018 bytes),  marked for eviction 166 entries (33428901346 bytes).
I0000 00:00:1718



I0000 00:00:1718834794.127181     845 tpu_compile_op_common.cc:245] Compilation of 605d7700b6631443:0:0 with session name  took 6.539121447s and succeeded
I0000 00:00:1718834794.179206     845 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(605d7700b6631443:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8383464_9479853458233839682", property.function_library_fingerprint = 4790181233569658819, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718834794.179427     845 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13
f1 score: 0.7839601039886475 and accuracy: 0.8429622054100037


[I 2024-06-19 22:07:21,164] Trial 251 finished with value: 0.8429622054100037 and parameters: {'num_epochs': 13, 'dropout_rate': 0.23736713025790493, 'weight_decay': 0.05078464562578557, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.35243068981110715}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of 

Epoch 1/16


I0000 00:00:1718835114.794208     822 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9c3e48c4948963d4:0:0), session_name()
I0000 00:00:1718835162.256961     822 tpu_compile_op_common.cc:245] Compilation of 9c3e48c4948963d4:0:0 with session name  took 47.462670483s and succeeded
I0000 00:00:1718835162.497575     822 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9c3e48c4948963d4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8542858_1966485000754119932", property.function_library_fingerprint = 516238943304300998, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718835231.931482     860 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4ebe2b30460e1893:0:0), session_name()
I0000 00:00:1718835238.421998     860 tpu_compile_op_common.cc:245] Compilation of 4ebe2b30460e1893:0:0 with session name  took 6.490439408s and succeeded
I0000 00:00:1718835238.475809     860 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4ebe2b30460e1893:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8579386_10088539071674422476", property.function_library_fingerprint = 2628226445651047237, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718835245.138621     853 tpu_compile_op_common.cc:245] Compilation of c10ce8ce097c45e5:0:0 with session name  took 6.50577721s and succeeded
I0000 00:00:1718835245.184427     853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c10ce8ce097c45e5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8579386_10088539071674422476", property.function_library_fingerprint = 2628226445651047237, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718835245.184735     853 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
f1 score: 0.8249412775039673 and accuracy: 0.8025209903717041


[I 2024-06-19 22:15:02,387] Trial 257 finished with value: 0.8025209903717041 and parameters: {'num_epochs': 16, 'dropout_rate': 0.15000668867724543, 'weight_decay': 0.09337969407666942, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3789857966109872}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/18


I0000 00:00:1718835570.048783     870 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e06fea3f53236814:0:0), session_name()
I0000 00:00:1718835615.612962     870 tpu_compile_op_common.cc:245] Compilation of e06fea3f53236814:0:0 with session name  took 45.564110925s and succeeded
I0000 00:00:1718835615.826345     870 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e06fea3f53236814:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8743748_12703518192211275964", property.function_library_fingerprint = 13290677360679857470, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718835660.563550     793 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(729ec3544a897d66:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8743748_12703518192211275964", property.function_library_fingerprint = 13290677360679857470, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718835660.565398     793 tpu_compilation_cache_interface.cc:541] After adding entry for key 729ec3544a897d66:0:0 with session_name  cache is 216 entries (43343891330 bytes),  marked for eviction 174 entries (34979872677 bytes).
I0000 00:00:171



I0000 00:00:1718835701.250249     815 tpu_compile_op_common.cc:245] Compilation of 85745118850a0772:0:0 with session name  took 6.421049031s and succeeded
I0000 00:00:1718835701.301939     815 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(85745118850a0772:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8780276_11261846553324910671", property.function_library_fingerprint = 4225310663004459399, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718835701.302236     815 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
f1 score: 0.8236174583435059 and accuracy: 0.8251050710678101


[I 2024-06-19 22:22:44,594] Trial 266 finished with value: 0.8251050710678101 and parameters: {'num_epochs': 18, 'dropout_rate': 0.18019334943901733, 'weight_decay': 0.09706356199650215, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.26817153612529004}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/10


I0000 00:00:1718836034.585143     835 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(5622d7d0fe51565b:0:0), session_name()
I0000 00:00:1718836082.318358     835 tpu_compile_op_common.cc:245] Compilation of 5622d7d0fe51565b:0:0 with session name  took 47.733099551s and succeeded
I0000 00:00:1718836082.556611     835 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5622d7d0fe51565b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8947878_98749535320861931", property.function_library_fingerprint = 16535104415821703780, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718836126.260552     798 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c1dd7b796fb80fdb:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8947878_98749535320861931", property.function_library_fingerprint = 16535104415821703780, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718836126.262391     798 tpu_compilation_cache_interface.cc:541] After adding entry for key c1dd7b796fb80fdb:0:0 with session_name  cache is 220 entries (44119094710 bytes),  marked for eviction 178 entries (35877578892 bytes).
I0000 00:00:171883



I0000 00:00:1718836164.668581     791 tpu_compile_op_common.cc:245] Compilation of f8a32c36ed00cdd3:0:0 with session name  took 5.63794379s and succeeded
I0000 00:00:1718836164.711892     791 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f8a32c36ed00cdd3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_8984386_11445151953611699005", property.function_library_fingerprint = 9674500850346512048, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718836164.712133     791 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7142922878265381 and accuracy: 0.8392857313156128


[I 2024-06-19 22:30:01,263] Trial 273 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 10, 'dropout_rate': 0.3182900056027689, 'weight_decay': 0.020511020035474233, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.03371126394520492}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 m

Epoch 1/6


I0000 00:00:1718836470.359341     810 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b2acd8273e3b91a7:0:0), session_name()
I0000 00:00:1718836517.700039     810 tpu_compile_op_common.cc:245] Compilation of b2acd8273e3b91a7:0:0 with session name  took 47.340629947s and succeeded
I0000 00:00:1718836517.899649     810 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b2acd8273e3b91a7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9138954_4353938568221426450", property.function_library_fingerprint = 8689202306941104158, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718836561.561412     791 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f65e36c2312bcc32:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9138954_4353938568221426450", property.function_library_fingerprint = 8689202306941104158, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718836561.562620     791 tpu_compilation_cache_interface.cc:541] After adding entry for key f65e36c2312bcc32:0:0 with session_name  cache is 224 entries (44894712980 bytes),  marked for eviction 180 entries (36365698512 bytes).
I0000 00:00:17188



I0000 00:00:1718836600.021272     862 tpu_compile_op_common.cc:245] Compilation of 84f737d68fd36004:0:0 with session name  took 6.130601017s and succeeded
I0000 00:00:1718836600.061403     862 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(84f737d68fd36004:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9175524_5968881037633238880", property.function_library_fingerprint = 9441730097047037535, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718836600.061642     862 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
f1 score: 0.7090151906013489 and accuracy: 0.8413865566253662


[I 2024-06-19 22:37:00,029] Trial 281 finished with value: 0.8413865566253662 and parameters: {'num_epochs': 6, 'dropout_rate': 0.2406354883056938, 'weight_decay': 0.03249556515253729, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.1903177567379883}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the

Epoch 1/9


I0000 00:00:1718836898.887171     872 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(df281e9b86a28294:0:0), session_name()
I0000 00:00:1718836946.524797     872 tpu_compile_op_common.cc:245] Compilation of df281e9b86a28294:0:0 with session name  took 47.637484283s and succeeded
I0000 00:00:1718836946.793839     872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(df281e9b86a28294:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9323254_3589214205287360153", property.function_library_fingerprint = 2443247885475175056, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718836993.703155     813 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ee7ddabe3491f981:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9323254_3589214205287360153", property.function_library_fingerprint = 2443247885475175056, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718836993.703916     813 tpu_compilation_cache_interface.cc:541] After adding entry for key ee7ddabe3491f981:0:0 with session_name  cache is 228 entries (45669916408 bytes),  marked for eviction 184 entries (37139922238 bytes).
I0000 00:00:17188



I0000 00:00:1718837035.674554     865 tpu_compile_op_common.cc:245] Compilation of 6d80b233f6dadbea:0:0 with session name  took 6.641810711s and succeeded
I0000 00:00:1718837035.723486     865 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6d80b233f6dadbea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9359762_1143041493884406873", property.function_library_fingerprint = 15149346022124932513, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718837035.723720     865 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7096564173698425 and accuracy: 0.8340336084365845


[I 2024-06-19 22:44:26,338] Trial 287 finished with value: 0.8340336084365845 and parameters: {'num_epochs': 9, 'dropout_rate': 0.29885257989867026, 'weight_decay': 9.858167254734066e-05, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.06810743233983359}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 

Epoch 1/8


I0000 00:00:1718837336.414356     844 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(be3dcb26d49c8a0e:0:0), session_name()
I0000 00:00:1718837383.957259     844 tpu_compile_op_common.cc:245] Compilation of be3dcb26d49c8a0e:0:0 with session name  took 47.542830707s and succeeded
I0000 00:00:1718837384.190217     844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(be3dcb26d49c8a0e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9512516_6857159378577173683", property.function_library_fingerprint = 8329990112594400161, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718837431.479757     813 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(90d1cd059cc35ac:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9512516_6857159378577173683", property.function_library_fingerprint = 8329990112594400161, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718837431.481488     813 tpu_compilation_cache_interface.cc:541] After adding entry for key 90d1cd059cc35ac:0:0 with session_name  cache is 232 entries (46445383146 bytes),  marked for eviction 188 entries (37915403578 bytes).
I0000 00:00:1718837



I0000 00:00:1718837470.688764     789 tpu_compile_op_common.cc:245] Compilation of a6cb66faa950390d:0:0 with session name  took 6.222544993s and succeeded
I0000 00:00:1718837470.740030     789 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a6cb66faa950390d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9549040_5825568673906245589", property.function_library_fingerprint = 4741443494563713761, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718837470.740373     789 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7083057761192322 and accuracy: 0.8272058963775635


[I 2024-06-19 22:51:39,832] Trial 296 finished with value: 0.8272058963775635 and parameters: {'num_epochs': 8, 'dropout_rate': 0.2886058397941223, 'weight_decay': 0.026165057619540407, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.27183331915431447}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/7


I0000 00:00:1718837772.689581     841 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(31d72a639e2b1179:0:0), session_name()
I0000 00:00:1718837821.428385     841 tpu_compile_op_common.cc:245] Compilation of 31d72a639e2b1179:0:0 with session name  took 48.738751194s and succeeded
I0000 00:00:1718837821.667311     841 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(31d72a639e2b1179:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9700154_1927466610250437209", property.function_library_fingerprint = 15697957187146000576, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718837869.198219     810 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(975ed874a72cb2be:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9700154_1927466610250437209", property.function_library_fingerprint = 15697957187146000576, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718837869.200451     810 tpu_compilation_cache_interface.cc:541] After adding entry for key 975ed874a72cb2be:0:0 with session_name  cache is 236 entries (47220864438 bytes),  marked for eviction 192 entries (38691022216 bytes).
I0000 00:00:1718



I0000 00:00:1718837908.170326     851 tpu_compile_op_common.cc:245] Compilation of 65777a231617a75b:0:0 with session name  took 5.992278843s and succeeded
I0000 00:00:1718837908.217180     851 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(65777a231617a75b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9736682_2541246129216970980", property.function_library_fingerprint = 13955032736090360354, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718837908.217476     851 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7048864364624023 and accuracy: 0.8361344337463379


[I 2024-06-19 22:58:53,526] Trial 302 finished with value: 0.8361344337463379 and parameters: {'num_epochs': 7, 'dropout_rate': 0.27325169818655787, 'weight_decay': 0.05471459015073156, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.5869573307008383}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/11


I0000 00:00:1718838202.794822     853 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ec610786cd2f6fa5:0:0), session_name()
I0000 00:00:1718838247.483598     853 tpu_compile_op_common.cc:245] Compilation of ec610786cd2f6fa5:0:0 with session name  took 44.688715535s and succeeded
I0000 00:00:1718838247.713335     853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec610786cd2f6fa5:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9886124_6034621695599963386", property.function_library_fingerprint = 9051626394500585058, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718838294.007003     824 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7145234511fecbdd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9886124_6034621695599963386", property.function_library_fingerprint = 9051626394500585058, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718838294.008030     824 tpu_compilation_cache_interface.cc:541] After adding entry for key 7145234511fecbdd:0:0 with session_name  cache is 240 entries (47996330856 bytes),  marked for eviction 196 entries (39466488954 bytes).
I0000 00:00:17188



I0000 00:00:1718838335.263123     845 tpu_compile_op_common.cc:245] Compilation of fcf8fc7548098569:0:0 with session name  took 6.245694945s and succeeded
I0000 00:00:1718838335.307665     845 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fcf8fc7548098569:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_9922648_6529616642959873883", property.function_library_fingerprint = 15917876316349738330, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718838335.307879     845 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7441150546073914 and accuracy: 0.8466386795043945


[I 2024-06-19 23:06:13,156] Trial 310 finished with value: 0.8466386795043945 and parameters: {'num_epochs': 11, 'dropout_rate': 0.28267778341686156, 'weight_decay': 0.08013117649014169, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.2698855119391673}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/8


I0000 00:00:1718838647.467201     859 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b426cea30ae21b08:0:0), session_name()
I0000 00:00:1718838696.126347     859 tpu_compile_op_common.cc:245] Compilation of b426cea30ae21b08:0:0 with session name  took 48.659039511s and succeeded
I0000 00:00:1718838696.376530     859 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b426cea30ae21b08:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10078714_14157797107101001552", property.function_library_fingerprint = 18003168328329063941, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0



I0000 00:00:1718838738.429044     848 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2584115cf7094aea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10078714_14157797107101001552", property.function_library_fingerprint = 18003168328329063941, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718838738.430631     848 tpu_compilation_cache_interface.cc:541] After adding entry for key 2584115cf7094aea:0:0 with session_name  cache is 244 entries (48771797322 bytes),  marked for eviction 200 entries (40241692430 bytes).
I0000 00:00:17



I0000 00:00:1718838775.846376     816 tpu_compile_op_common.cc:245] Compilation of ec42cf2a1dcfe034:0:0 with session name  took 5.823123851s and succeeded
I0000 00:00:1718838775.893885     816 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ec42cf2a1dcfe034:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10115238_14273641460229913682", property.function_library_fingerprint = 1722033571380310657, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718838775.894113     816 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7021684646606445 and accuracy: 0.8440126180648804


[I 2024-06-19 23:13:25,378] Trial 316 finished with value: 0.8440126180648804 and parameters: {'num_epochs': 8, 'dropout_rate': 0.3066079599373859, 'weight_decay': 0.07231944566230136, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.31429922155084294}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/20


I0000 00:00:1718839079.472313     812 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(b55ec6e86129e228:0:0), session_name()
I0000 00:00:1718839128.134870     812 tpu_compile_op_common.cc:245] Compilation of b55ec6e86129e228:0:0 with session name  took 48.66250425s and succeeded
I0000 00:00:1718839128.372913     812 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(b55ec6e86129e228:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10266280_10719929548644132055", property.function_library_fingerprint = 14792154732348571384, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718839175.237057     794 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(fb26a2b8f46c83f9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10266280_10719929548644132055", property.function_library_fingerprint = 14792154732348571384, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718839175.239282     794 tpu_compilation_cache_interface.cc:541] After adding entry for key fb26a2b8f46c83f9:0:0 with session_name  cache is 248 entries (49547000846 bytes),  marked for eviction 204 entries (41017310700 bytes).
I0000 00:00:17



I0000 00:00:1718839214.828040     853 tpu_compile_op_common.cc:245] Compilation of 3ebca063cc8fb1d2:0:0 with session name  took 6.705057666s and succeeded
I0000 00:00:1718839214.879332     853 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3ebca063cc8fb1d2:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10302788_9802890916134217842", property.function_library_fingerprint = 9972578480936150643, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718839214.879666     853 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
f1 score: 0.746545672416687 and accuracy: 0.8398109078407288


[I 2024-06-19 23:21:28,211] Trial 324 finished with value: 0.8398109078407288 and parameters: {'num_epochs': 20, 'dropout_rate': 0.3373170022693093, 'weight_decay': 0.0757467090379526, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.24539759143494708}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/17


I0000 00:00:1718839562.760029     818 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e6d947554ca9688e:0:0), session_name()
I0000 00:00:1718839612.293207     818 tpu_compile_op_common.cc:245] Compilation of e6d947554ca9688e:0:0 with session name  took 49.533105478s and succeeded
I0000 00:00:1718839612.509012     818 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e6d947554ca9688e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10473916_5800627375746795700", property.function_library_fingerprint = 18111385241730612308, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718839684.483814     859 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4df83294c7f77e55:0:0), session_name()
I0000 00:00:1718839690.669577     859 tpu_compile_op_common.cc:245] Compilation of 4df83294c7f77e55:0:0 with session name  took 6.185667085s and succeeded
I0000 00:00:1718839690.732309     859 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4df83294c7f77e55:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10510486_6800744118700260198", property.function_library_fingerprint = 1899629346157425757, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718839697.145780     782 tpu_compile_op_common.cc:245] Compilation of 509196063a536c7f:0:0 with session name  took 6.286128148s and succeeded
I0000 00:00:1718839697.199606     782 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(509196063a536c7f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10510486_6800744118700260198", property.function_library_fingerprint = 1899629346157425757, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718839697.199815     782 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/17
Epoch 3/17
Epoch 4/17
Epoch 5/17
Epoch 6/17
Epoch 7/17
Epoch 8/17
Epoch 9/17
Epoch 10/17
Epoch 11/17
Epoch 12/17
Epoch 13/17
Epoch 14/17
Epoch 15/17
Epoch 16/17
Epoch 17/17
f1 score: 0.7788925170898438 and accuracy: 0.832457959651947


[I 2024-06-19 23:29:17,520] Trial 331 finished with value: 0.832457959651947 and parameters: {'num_epochs': 17, 'dropout_rate': 0.28186414745777716, 'weight_decay': 0.016505496283772556, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.2362133903710074}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of t

Epoch 1/8


I0000 00:00:1718840027.102948     812 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(6149fe70c860183e:0:0), session_name()
I0000 00:00:1718840075.621094     812 tpu_compile_op_common.cc:245] Compilation of 6149fe70c860183e:0:0 with session name  took 48.518068912s and succeeded
I0000 00:00:1718840075.852203     812 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(6149fe70c860183e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10676432_5374689825138275607", property.function_library_fingerprint = 2555429530188876346, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718840122.900536     854 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9eb7db2f5b0ddffd:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10676432_5374689825138275607", property.function_library_fingerprint = 2555429530188876346, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718840122.901957     854 tpu_compilation_cache_interface.cc:541] After adding entry for key 9eb7db2f5b0ddffd:0:0 with session_name  cache is 256 entries (51097822640 bytes),  marked for eviction 212 entries (42568409990 bytes).
I0000 00:00:1718



I0000 00:00:1718840162.467776     782 tpu_compile_op_common.cc:245] Compilation of d8572fc05f9a1625:0:0 with session name  took 6.464993873s and succeeded
I0000 00:00:1718840162.519308     782 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(d8572fc05f9a1625:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10712940_12761028039765176004", property.function_library_fingerprint = 18403669184155254602, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718840162.519361     782 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.6574316620826721 and accuracy: 0.8329831957817078


[I 2024-06-19 23:36:31,388] Trial 339 finished with value: 0.8329831957817078 and parameters: {'num_epochs': 8, 'dropout_rate': 0.3494718234321891, 'weight_decay': 0.023230404371680714, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.3694058763969366}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/20


I0000 00:00:1718840463.052425     811 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(417367d4b28754cf:0:0), session_name()
I0000 00:00:1718840513.706579     811 tpu_compile_op_common.cc:245] Compilation of 417367d4b28754cf:0:0 with session name  took 50.65405925s and succeeded
I0000 00:00:1718840513.943605     811 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(417367d4b28754cf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10863982_16801722205404944769", property.function_library_fingerprint = 9818409824929846782, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718840581.384805     802 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9c8432a0dbb05647:0:0), session_name()
I0000 00:00:1718840587.549815     802 tpu_compile_op_common.cc:245] Compilation of 9c8432a0dbb05647:0:0 with session name  took 6.164910597s and succeeded
I0000 00:00:1718840587.596274     802 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9c8432a0dbb05647:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10900490_479128157101965970", property.function_library_fingerprint = 7685936193425037404, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, em



I0000 00:00:1718840594.346452     867 tpu_compile_op_common.cc:245] Compilation of aa58612b5f5e8817:0:0 with session name  took 6.622864362s and succeeded
I0000 00:00:1718840594.403014     867 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(aa58612b5f5e8817:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_10900490_479128157101965970", property.function_library_fingerprint = 7685936193425037404, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718840594.403537     867 tpu_compilation_cache_interface.cc:541] After adding entry for ke

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
f1 score: 0.8161771297454834 and accuracy: 0.8303571343421936


[I 2024-06-19 23:44:25,147] Trial 346 finished with value: 0.8303571343421936 and parameters: {'num_epochs': 20, 'dropout_rate': 0.19895519716200769, 'weight_decay': 0.08358774513194742, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.34438583037057313}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 m

Epoch 1/10


I0000 00:00:1718840940.875489     866 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4e225a0ac73910ed:0:0), session_name()
I0000 00:00:1718840991.007087     866 tpu_compile_op_common.cc:245] Compilation of 4e225a0ac73910ed:0:0 with session name  took 50.131529502s and succeeded
I0000 00:00:1718840991.211754     866 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4e225a0ac73910ed:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11071476_18437678866154739291", property.function_library_fingerprint = 8571088037201837355, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718841062.179787     807 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(476c5a03fc2d434f:0:0), session_name()
I0000 00:00:1718841069.356199     807 tpu_compile_op_common.cc:245] Compilation of 476c5a03fc2d434f:0:0 with session name  took 7.176357716s and succeeded
I0000 00:00:1718841069.413404     807 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(476c5a03fc2d434f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11108004_14477298869002116262", property.function_library_fingerprint = 14417067926387814799, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718841076.095264     824 tpu_compile_op_common.cc:245] Compilation of a234f36f7dbdd94d:0:0 with session name  took 6.561679112s and succeeded
I0000 00:00:1718841076.146519     824 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a234f36f7dbdd94d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11108004_14477298869002116262", property.function_library_fingerprint = 14417067926387814799, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718841076.146788     824 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.8021911978721619 and accuracy: 0.819327712059021


[I 2024-06-19 23:51:52,372] Trial 354 finished with value: 0.819327712059021 and parameters: {'num_epochs': 10, 'dropout_rate': 0.13777452188914788, 'weight_decay': 0.0885331894345489, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.281292576116935}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model T

Epoch 1/8


I0000 00:00:1718841386.368879     859 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(33c145e92f1f33e7:0:0), session_name()
I0000 00:00:1718841435.896134     859 tpu_compile_op_common.cc:245] Compilation of 33c145e92f1f33e7:0:0 with session name  took 49.527177617s and succeeded
I0000 00:00:1718841436.140647     859 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(33c145e92f1f33e7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11262430_9127867076939076854", property.function_library_fingerprint = 9268964817952086432, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718841508.998009     864 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2c4b72be8b158552:0:0), session_name()
I0000 00:00:1718841516.072922     864 tpu_compile_op_common.cc:245] Compilation of 2c4b72be8b158552:0:0 with session name  took 7.074854063s and succeeded
I0000 00:00:1718841516.131359     864 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2c4b72be8b158552:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11298958_17931066313201876011", property.function_library_fingerprint = 259316352550377104, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718841522.551918     809 tpu_compile_op_common.cc:245] Compilation of 8ce7e0b08a29af8e:0:0 with session name  took 6.296826354s and succeeded
I0000 00:00:1718841522.598969     809 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(8ce7e0b08a29af8e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11298958_17931066313201876011", property.function_library_fingerprint = 259316352550377104, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718841522.599194     809 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7453920841217041 and accuracy: 0.8392857313156128


[I 2024-06-19 23:59:09,639] Trial 361 finished with value: 0.8392857313156128 and parameters: {'num_epochs': 8, 'dropout_rate': 0.21633609390895492, 'weight_decay': 0.07028994503501176, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.7603896181175805}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/10


I0000 00:00:1718841816.589318     831 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(71583d0cce1fc2e6:0:0), session_name()
I0000 00:00:1718841865.776954     831 tpu_compile_op_common.cc:245] Compilation of 71583d0cce1fc2e6:0:0 with session name  took 49.187571009s and succeeded
I0000 00:00:1718841865.997511     831 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(71583d0cce1fc2e6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11450072_1522931449234473803", property.function_library_fingerprint = 16452884578731459393, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718841913.680472     849 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(7db0d7fc7e6cdb8f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11450072_1522931449234473803", property.function_library_fingerprint = 16452884578731459393, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718841913.681209     849 tpu_compilation_cache_interface.cc:541] After adding entry for key 7db0d7fc7e6cdb8f:0:0 with session_name  cache is 272 entries (54199469960 bytes),  marked for eviction 228 entries (45669916408 bytes).
I0000 00:00:171



I0000 00:00:1718841952.674590     843 tpu_compile_op_common.cc:245] Compilation of 24f2e23e5c1ed271:0:0 with session name  took 5.792276312s and succeeded
I0000 00:00:1718841952.718935     843 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(24f2e23e5c1ed271:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11486600_1926460604168596587", property.function_library_fingerprint = 11737830930561656385, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718841952.719155     843 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7786744832992554 and accuracy: 0.8466386795043945


[I 2024-06-20 00:06:28,581] Trial 369 finished with value: 0.8466386795043945 and parameters: {'num_epochs': 10, 'dropout_rate': 0.21483475771340282, 'weight_decay': 0.049171233662495914, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.3580614581898307}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/16


I0000 00:00:1718842262.093495     821 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(4590679ce1d58bc8:0:0), session_name()
I0000 00:00:1718842310.815824     821 tpu_compile_op_common.cc:245] Compilation of 4590679ce1d58bc8:0:0 with session name  took 48.722252863s and succeeded
I0000 00:00:1718842311.059162     821 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4590679ce1d58bc8:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11641026_11779138715211205636", property.function_library_fingerprint = 2801494206389196549, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718842384.622980     816 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(58967943911adf95:0:0), session_name()
I0000 00:00:1718842391.389413     816 tpu_compile_op_common.cc:245] Compilation of 58967943911adf95:0:0 with session name  took 6.766382699s and succeeded
I0000 00:00:1718842391.454424     816 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(58967943911adf95:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11677554_12646212738922020469", property.function_library_fingerprint = 10702815508403203222, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718842397.984143     790 tpu_compile_op_common.cc:245] Compilation of a2133d16969ec5aa:0:0 with session name  took 6.401847608s and succeeded
I0000 00:00:1718842398.029505     790 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a2133d16969ec5aa:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11677554_12646212738922020469", property.function_library_fingerprint = 10702815508403203222, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718842398.029755     790 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
f1 score: 0.7923865914344788 and accuracy: 0.8277310729026794


[I 2024-06-20 00:14:14,690] Trial 376 finished with value: 0.8277310729026794 and parameters: {'num_epochs': 16, 'dropout_rate': 0.22953037060661066, 'weight_decay': 0.04424198121347369, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.40602339159086354}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/10


I0000 00:00:1718842727.120651     843 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(32341544e6d07507:0:0), session_name()
I0000 00:00:1718842776.117856     843 tpu_compile_op_common.cc:245] Compilation of 32341544e6d07507:0:0 with session name  took 48.997139994s and succeeded
I0000 00:00:1718842776.355178     843 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(32341544e6d07507:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11841916_3706989016287294739", property.function_library_fingerprint = 11807211141450354494, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718842823.575989     838 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f4d29a891c591f3:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11841916_3706989016287294739", property.function_library_fingerprint = 11807211141450354494, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718842823.577464     838 tpu_compilation_cache_interface.cc:541] After adding entry for key f4d29a891c591f3:0:0 with session_name  cache is 280 entries (55750432368 bytes),  marked for eviction 236 entries (47220864438 bytes).
I0000 00:00:17188



I0000 00:00:1718842863.820099     814 tpu_compile_op_common.cc:245] Compilation of 28edd86ff8a44193:0:0 with session name  took 6.236423332s and succeeded
I0000 00:00:1718842863.863518     814 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(28edd86ff8a44193:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_11878444_1295277455082565954", property.function_library_fingerprint = 17579120091080112780, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718842863.863724     814 tpu_compilation_cache_interface.cc:541] After adding entry for 

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7621287703514099 and accuracy: 0.831932783126831


[I 2024-06-20 00:21:38,299] Trial 384 finished with value: 0.831932783126831 and parameters: {'num_epochs': 10, 'dropout_rate': 0.23544701702689008, 'weight_decay': 0.06444791855404933, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.47455140162514864}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/9


I0000 00:00:1718843170.816868     804 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(9110a7e5e88fe4ee:0:0), session_name()
I0000 00:00:1718843217.056120     804 tpu_compile_op_common.cc:245] Compilation of 9110a7e5e88fe4ee:0:0 with session name  took 46.239160456s and succeeded
I0000 00:00:1718843217.292189     804 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9110a7e5e88fe4ee:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12032870_8213834058435048089", property.function_library_fingerprint = 16044904900855836055, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718843260.829534     804 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(93213bc6f959459e:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12032870_8213834058435048089", property.function_library_fingerprint = 16044904900855836055, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718843260.831200     804 tpu_compilation_cache_interface.cc:541] After adding entry for key 93213bc6f959459e:0:0 with session_name  cache is 284 entries (56525913388 bytes),  marked for eviction 240 entries (47996330856 bytes).
I0000 00:00:171



I0000 00:00:1718843299.672117     834 tpu_compile_op_common.cc:245] Compilation of db3af6585a302820:0:0 with session name  took 6.418108264s and succeeded
I0000 00:00:1718843299.725689     834 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(db3af6585a302820:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12069398_8832295175160331341", property.function_library_fingerprint = 8611032785055365246, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718843299.726074     834 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
f1 score: 0.7456743717193604 and accuracy: 0.8335084319114685


[I 2024-06-20 00:28:50,323] Trial 392 finished with value: 0.8335084319114685 and parameters: {'num_epochs': 9, 'dropout_rate': 0.24927992744328487, 'weight_decay': 0.06178989235790492, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.46990840786119875}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/10


I0000 00:00:1718843602.251537     833 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(2e21018183065b7f:0:0), session_name()
I0000 00:00:1718843650.757394     833 tpu_compile_op_common.cc:245] Compilation of 2e21018183065b7f:0:0 with session name  took 48.50580796s and succeeded
I0000 00:00:1718843650.973845     833 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(2e21018183065b7f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12222152_17895205518176914221", property.function_library_fingerprint = 7668168297615687569, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718843700.073464     797 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(afcb04015dde63d6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12222152_17895205518176914221", property.function_library_fingerprint = 7668168297615687569, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718843700.075014     797 tpu_compilation_cache_interface.cc:541] After adding entry for key afcb04015dde63d6:0:0 with session_name  cache is 288 entries (57301379854 bytes),  marked for eviction 244 entries (48771797322 bytes).
I0000 00:00:171



I0000 00:00:1718843740.008682     816 tpu_compile_op_common.cc:245] Compilation of f09816c71b9c3cc7:0:0 with session name  took 6.426150033s and succeeded
I0000 00:00:1718843740.050935     816 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(f09816c71b9c3cc7:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12258676_6532637069689029906", property.function_library_fingerprint = 6509555320573467985, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718843740.051163     816 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7352162599563599 and accuracy: 0.8429622054100037


[I 2024-06-20 00:36:16,992] Trial 399 finished with value: 0.8429622054100037 and parameters: {'num_epochs': 10, 'dropout_rate': 0.29190941843818075, 'weight_decay': 0.07572748544646404, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.7476574456662404}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/10


I0000 00:00:1718844051.859613     824 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(91901ea2f3d864b9:0:0), session_name()
I0000 00:00:1718844095.989162     824 tpu_compile_op_common.cc:245] Compilation of 91901ea2f3d864b9:0:0 with session name  took 44.129480251s and succeeded
I0000 00:00:1718844096.177731     824 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(91901ea2f3d864b9:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12413030_2767787830421563299", property.function_library_fingerprint = 13119868281722897230, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718844139.144448     840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a900d4e34bc0f8ec:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12413030_2767787830421563299", property.function_library_fingerprint = 13119868281722897230, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718844139.145644     840 tpu_compilation_cache_interface.cc:541] After adding entry for key a900d4e34bc0f8ec:0:0 with session_name  cache is 292 entries (58076583330 bytes),  marked for eviction 248 entries (49547000846 bytes).
I0000 00:00:171



I0000 00:00:1718844177.598644     861 tpu_compile_op_common.cc:245] Compilation of 9a7174d38e1fa48a:0:0 with session name  took 5.928257478s and succeeded
I0000 00:00:1718844177.638132     861 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(9a7174d38e1fa48a:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12449538_9819482900397947453", property.function_library_fingerprint = 3468933647586518580, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718844177.638445     861 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
f1 score: 0.7521538734436035 and accuracy: 0.841911792755127


[I 2024-06-20 00:43:33,436] Trial 406 finished with value: 0.841911792755127 and parameters: {'num_epochs': 10, 'dropout_rate': 0.2592255510608093, 'weight_decay': 0.07465590281365798, 'lr_scheduler_type': 'constant', 'gradient_clip_norm': 0.38726886672643324}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/12


I0000 00:00:1718844483.280880     829 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(a5dd8c7164b0031b:0:0), session_name()
I0000 00:00:1718844529.751615     829 tpu_compile_op_common.cc:245] Compilation of a5dd8c7164b0031b:0:0 with session name  took 46.470623057s and succeeded
I0000 00:00:1718844529.942756     829 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(a5dd8c7164b0031b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12603948_989549812062575865", property.function_library_fingerprint = 14194809176995647255, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0, 



I0000 00:00:1718844601.640823     807 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(18366008f4b250ea:0:0), session_name()
I0000 00:00:1718844608.264843     807 tpu_compile_op_common.cc:245] Compilation of 18366008f4b250ea:0:0 with session name  took 6.623951018s and succeeded
I0000 00:00:1718844608.321246     807 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(18366008f4b250ea:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12640472_17861961265513531548", property.function_library_fingerprint = 12498963429051029785, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718844615.579492     811 tpu_compile_op_common.cc:245] Compilation of 5c5f12cb96b60da6:0:0 with session name  took 7.118676055s and succeeded
I0000 00:00:1718844615.631977     811 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(5c5f12cb96b60da6:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12640472_17861961265513531548", property.function_library_fingerprint = 12498963429051029785, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718844615.632829     811 tpu_compilation_cache_interface.cc:541] After adding entry for

Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
f1 score: 0.7712225317955017 and accuracy: 0.832457959651947


[I 2024-06-20 00:50:58,771] Trial 414 finished with value: 0.832457959651947 and parameters: {'num_epochs': 12, 'dropout_rate': 0.23638129959002988, 'weight_decay': 0.05906853967553024, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.5283703717487076}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model

Epoch 1/8


I0000 00:00:1718844929.839516     857 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(3364480a56eb7bb4:0:0), session_name()
I0000 00:00:1718844980.492213     857 tpu_compile_op_common.cc:245] Compilation of 3364480a56eb7bb4:0:0 with session name  took 50.652640249s and succeeded
I0000 00:00:1718844980.726118     857 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(3364480a56eb7bb4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12798210_14178776600371385670", property.function_library_fingerprint = 16441126506368043596, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0



I0000 00:00:1718845028.061472     871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(4977b9b4e8b9410b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12798210_14178776600371385670", property.function_library_fingerprint = 16441126506368043596, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718845028.063333     871 tpu_compilation_cache_interface.cc:541] After adding entry for key 4977b9b4e8b9410b:0:0 with session_name  cache is 300 entries (59627531136 bytes),  marked for eviction 256 entries (51097822640 bytes).
I0000 00:00:17



I0000 00:00:1718845068.541637     844 tpu_compile_op_common.cc:245] Compilation of c37f006fb3339f1b:0:0 with session name  took 6.000249931s and succeeded
I0000 00:00:1718845068.590417     844 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c37f006fb3339f1b:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12834738_1260941960090908538", property.function_library_fingerprint = 1941883687951434289, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718845068.590673     844 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
f1 score: 0.7483506798744202 and accuracy: 0.8335084319114685


[I 2024-06-20 00:58:15,395] Trial 422 finished with value: 0.8335084319114685 and parameters: {'num_epochs': 8, 'dropout_rate': 0.20169434941598735, 'weight_decay': 0.0033150734342571138, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.8885101609744456}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mod

Epoch 1/11


I0000 00:00:1718845365.177103     798 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(db5398996960d0ef:0:0), session_name()
I0000 00:00:1718845413.248660     798 tpu_compile_op_common.cc:245] Compilation of db5398996960d0ef:0:0 with session name  took 48.071473595s and succeeded
I0000 00:00:1718845413.470011     798 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(db5398996960d0ef:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_12985836_15045937113496936261", property.function_library_fingerprint = 4305601379717011605, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718845482.773166     872 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(e5486f5bb10a237f:0:0), session_name()
I0000 00:00:1718845489.492184     872 tpu_compile_op_common.cc:245] Compilation of e5486f5bb10a237f:0:0 with session name  took 6.718975059s and succeeded
I0000 00:00:1718845489.553212     872 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e5486f5bb10a237f:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_13022360_8064342593582849731", property.function_library_fingerprint = 6985433751600923629, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,49,;32,49,;32,;", property.guaranteed_constants_size = 0, e



I0000 00:00:1718845496.019846     871 tpu_compile_op_common.cc:245] Compilation of c8c7cafb99347ee4:0:0 with session name  took 6.327985962s and succeeded
I0000 00:00:1718845496.066827     871 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(c8c7cafb99347ee4:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_13022360_8064342593582849731", property.function_library_fingerprint = 6985433751600923629, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718845496.067128     871 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
f1 score: 0.7796976566314697 and accuracy: 0.8450630307197571


[I 2024-06-20 01:05:35,807] Trial 429 finished with value: 0.8450630307197571 and parameters: {'num_epochs': 11, 'dropout_rate': 0.21750662197207313, 'weight_decay': 0.06530200557061229, 'lr_scheduler_type': 'linear', 'gradient_clip_norm': 0.4481424864237451}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 mode

Epoch 1/7


I0000 00:00:1718845808.937694     809 tpu_compilation_cache_interface.cc:441] TPU host compilation cache miss: cache_key(ebbdf02866c11717:0:0), session_name()
I0000 00:00:1718845856.821317     809 tpu_compile_op_common.cc:245] Compilation of ebbdf02866c11717:0:0 with session name  took 47.883556267s and succeeded
I0000 00:00:1718845857.067160     809 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(ebbdf02866c11717:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_13178584_17373061355195724795", property.function_library_fingerprint = 5164768636356006386, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "32,63,;32,63,;32,;", property.guaranteed_constants_size = 0,



I0000 00:00:1718845900.495601     840 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e2adda239616aeaf:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_13178584_17373061355195724795", property.function_library_fingerprint = 5164768636356006386, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "10,63,;10,63,;10,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718845900.496783     840 tpu_compilation_cache_interface.cc:541] After adding entry for key e2adda239616aeaf:0:0 with session_name  cache is 308 entries (61178617480 bytes),  marked for eviction 264 entries (52648507824 bytes).
I0000 00:00:171



I0000 00:00:1718845938.417801     856 tpu_compile_op_common.cc:245] Compilation of e6eedd1952d8f32d:0:0 with session name  took 5.749166764s and succeeded
I0000 00:00:1718845938.460187     856 tpu_compilation_cache_interface.cc:475] TPU host compilation cache: compilation complete for cache_key(e6eedd1952d8f32d:0:0), session_name(), subgraph_key(std::string(property.function_name) = "while/cluster_while_body_13215154_2768525473601369083", property.function_library_fingerprint = 4515726912554891041, property.mlir_module_fingerprint = 0, property.num_replicas = 8, topology.chip_bounds().x = 2, topology.chip_bounds().y = 2, topology.chip_bounds().z = 1, topology.wrap().x = false, topology.wrap().y = false, topology.wrap().z = false, std::string(property.shapes_prefix) = "14,49,;14,49,;14,;", property.guaranteed_constants_size = 0, embedding_partitions_fingerprint = "1688352644216761960")
I0000 00:00:1718845938.460457     856 tpu_compilation_cache_interface.cc:541] After adding entry for k

Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
f1 score: 0.7346979975700378 and accuracy: 0.8434873819351196


[I 2024-06-20 01:12:44,031] Trial 437 finished with value: 0.8434873819351196 and parameters: {'num_epochs': 7, 'dropout_rate': 0.22797817897334094, 'weight_decay': 0.07312578454455873, 'lr_scheduler_type': 'cosine_with_restarts', 'gradient_clip_norm': 0.5869347698155665}. Best is trial 138 with value: 0.8539915680885315.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForSequenceClassification: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of th

Epoch 1/6


[W 2024-06-20 01:14:50,087] Trial 443 failed with parameters: {'num_epochs': 6, 'dropout_rate': 0.2690265784735457, 'weight_decay': 0.07234093015580796, 'lr_scheduler_type': 'cosine', 'gradient_clip_norm': 0.324392402135071} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_30/1187369262.py", line 235, in objective
    model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset, verbose=1)
  File "/usr/local/lib/python3.10/site-packages/transformers/modeling_tf_utils.py", line 1229, in fit
    return super().fit(*args, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 1804, in fit
    tmp_logs 

KeyboardInterrupt: 