In [None]:
# Install dependencies
!pip install numpy
!pip install pandas
!pip install tensorflow
!pip install scikit-learn
!pip install transformers

In [9]:
import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))

if tf.test.gpu_device_name():
    print('Default GPU Device Details: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install TensorFlow with GPU support.")

Num GPUs Available:  0
[]
Please install TensorFlow with GPU support.


In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizerFast, TFBertForSequenceClassification

# Load the dataset
df = pd.read_csv('../data/training.csv')

# Encode labels
le = LabelEncoder()
df['cyberbullying_type'] = le.fit_transform(df['cyberbullying_type'])

# Split into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(df['tweet_text'].tolist(), df['cyberbullying_type'].tolist(), test_size=.2)

# Initialize the BERT tokenizer
print("Tokenizing")
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# Tokenize the texts
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512)

# Prepare TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
  dict(train_encodings),
  train_labels
)).shuffle(1000).batch(8)
val_dataset = tf.data.Dataset.from_tensor_slices((
  dict(val_encodings),
  val_labels
)).batch(32)

# Load pre-trained BERT model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(le.classes_))

# Define optimizer, loss, and metrics
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = ['accuracy']

# Compile the model
print("Compiling...")
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

# Train the model
print("Training...")
history = model.fit(train_dataset, validation_data=val_dataset, epochs=3)


In [None]:
model.save('./tf_saved_model')