In [None]:
# ============== requirements =============
import tensorflow as tf
import pandas as pd
import numpy as np

from transformers import BertTokenizer, TFBertForSequenceClassification, AdamW

if tf.config.list_physical_devices('GPU'):
    device = 'gpu'
else:
    device = 'cpu'

In [None]:
#@title Default title text
train_data = pd.read_csv(r"/content/drive/MyDrive/Colab Notebooks/data/train.csv")
test_data = pd.read_csv(r"/content/drive/MyDrive/Colab Notebooks/data/test.csv")

test_data.rename(columns={
    'Class Index':'label',
    'Title':'title',
    'Description':'text',
}, inplace=True)

train_data.rename(columns={
    'Class Index':'label',
     'Title':'title',
      'Description':'text',
}, inplace=True)

train_data.drop(axis=1, columns='title', inplace=True)
test_data.drop(axis=1, columns='title', inplace=True)

train_data = train_data.sample(frac=0.05, random_state=0)
test_data = test_data.sample(frac=0.05, random_state=0)

train_data['label'].replace({1:0, 2:1, 3:2, 4: 3}, inplace=True)
test_data['label'].replace({1:0, 2:1, 3:2, 4: 3}, inplace=True)

x_train = train_data['text'].tolist()
y_train = train_data['label'].tolist()

x_test = test_data['text'].tolist()
y_test = test_data['label'].tolist()

train_data['label'].value_counts()

In [None]:
# Define the model name and number of labels
num_labels = 4

# Load the pre-trained BERT model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

train_encodings = tokenizer(x_train, truncation=True, padding=True, return_tensors='tf')
val_encodings = tokenizer(x_test, truncation=True, padding=True, return_tensors='tf')

# one hot encoding
train_labels = tf.keras.utils.to_categorical(y_train, num_classes=num_labels)
val_labels = tf.keras.utils.to_categorical(y_test, num_classes=num_labels)

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((dict(val_encodings), val_labels))


In [None]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.CategoricalAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

In [None]:
history = model.fit(train_dataset.shuffle(len(train_labels)).batch(16),
                    epochs=3,
                    batch_size=16,
                    validation_data=val_dataset.shuffle(len(val_labels)).batch(16))

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
model.evaluate(val_dataset.batch(16))




[0.6672008037567139, 0.7631579041481018]

In [None]:
model.save_pretrained('tf_bert_classifier')
tokenizer.save_pretrained('tf_bert_classifier')


('tf_bert_classifier/tokenizer_config.json',
 'tf_bert_classifier/special_tokens_map.json',
 'tf_bert_classifier/vocab.txt',
 'tf_bert_classifier/added_tokens.json')

In [None]:
modelT = TFBertForSequenceClassification.from_pretrained(r"/content/drive/MyDrive/Colab Notebooks/tf_bert_classifier")
tokenizerT = BertTokenizer.from_pretrained(r"/content/drive/MyDrive/Colab Notebooks/tf_bert_classifier")

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at /content/drive/MyDrive/Colab Notebooks/tf_bert_classifier.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [None]:
new_text = '''
Moments into her opening bout at the 2023 IBA Women’s World Championships, Lovlina Borgohain charged at her opponent, Mexico’s Vanessa Ortiz, and landed a big right hook to the face. That was, perhaps, the one and only effective aggressive move she executed, managing the rest of the 5-0 unanimous win by staying on the defensive.

This time last year, Lovlina found out that the 70kg category in which she won the bronze medal at the Tokyo Olympics is scrapped for Paris 2024. She chose to gain weight to go up to the 75kg category, as opposed to cutting weight to make it to 66kg. The choice, given her tall frame, made sense, and the ease with which she got past Ortiz to reach the quarterfinals, without too many aggressive moves, is vindication of that choice.

'''

new_encodings = tokenizerT.encode(new_text,
                           truncation=True,
                           padding=True)




In [None]:
len(new_encodings) 

180

In [None]:
new_predictions = modelT.predict([new_encodings])

new_label = tf.argmax(new_predictions.logits, axis=-1)
new_label.numpy()[0]



2

In [None]:
import gradio as gr

# Define the function that takes two numbers as input and returns their sum
def add_numbers(num1, num2):
    return num1 + num2

# Create a Gradio interface for the add_numbers function
iface = gr.Interface(fn=add_numbers, 
                     inputs=["number", "number"], 
                     outputs="number",
                     title="Add Two Numbers",
                     description="This app adds two numbers.")

# Launch the interface
iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://21c8270eca2ba2f708.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


