In [1]:
import tensorflow as tf
import datasets
import transformers
import numpy as np

In [2]:
print(tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("json", data_files={"train": "mesh_train.json","validation":"mesh_val.json", "test": "mesh_test.json"})
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["text"])


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

# put together samples inside a batch and convert your samples to tf.Tensor and concatenate them
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=2,
)

tf_validation_dataset = tokenized_datasets["validation"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator,
    batch_size=2,
)

tf_test_dataset = tokenized_datasets["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator,
    batch_size=2,
)

Using custom data configuration default-4c8aaf27dd61153a
Reusing dataset json (C:\Users\13056\.cache\huggingface\datasets\json\default-4c8aaf27dd61153a\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at C:\Users\13056\.cache\huggingface\datasets\json\default-4c8aaf27dd61153a\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-ae28e50c738abef3.arrow
Loading cached processed dataset at C:\Users\13056\.cache\huggingface\datasets\json\default-4c8aaf27dd61153a\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-7d430baba4f28a92.arrow
Loading cached processed dataset at C:\Users\13056\.cache\huggingface\datasets\json\default-4c8aaf27dd61153a\0.0.0\ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b\cache-526a04c62b1e65c6.arrow


In [4]:
from transformers import TFAutoModelForSequenceClassification

model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [5]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)

model.compile(
    optimizer=optimizer,
    loss=SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="mesh_model_checkpoint",
    monitor='val_accuracy',
    verbose=0,
    save_weights_only=True,
    mode='max',
    save_freq='epoch',
    options=None,
    initial_value_threshold=None
)


In [6]:
model.fit(
    tf_train_dataset,
    epochs=5,
    validation_data=tf_validation_dataset,
    callbacks=[model_checkpoint_callback]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1f6c1bf7c40>