In [4]:

import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, TFBertForSequenceClassification
import tensorflow as tf
import numpy as np

# Load the data from "merged_training.pkl"
df = pd.read_pickle("merged_training.pkl")

# Encode the emotion labels using LabelEncoder
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['emotions_encoded'] = label_encoder.fit_transform(df['emotions'])

# Sample a smaller subset of the data for faster experimentation
df = df.sample(frac=0.1, random_state=42)  # Adjust the fraction as needed

# Split the data into train and test sets
X = df['text']
y = df['emotions_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the list of emotion labels for later decoding
emotions = list(label_encoder.classes_)


BERT WITH REDUCED EPOCH,BATCH SIZE & LEARNING RATE

In [15]:
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the text data with a reduced maximum sequence length
max_length = 64  # Adjust the maximum sequence length
X_train_tokens = tokenizer(list(X_train), padding=True, truncation=True, max_length=max_length, return_tensors='tf')
X_test_tokens = tokenizer(list(X_test), padding=True, truncation=True, max_length=max_length, return_tensors='tf')

# Convert the emotion labels to categorical format
num_classes = len(emotions)

# Load the pre-trained BERT model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_classes)

# Compile the model with a reduced learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)  # Adjust the learning rate
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

# Train the model with reduced epochs and batch size
history = model.fit(
    X_train_tokens.data,
    y_train,
    validation_data=(X_test_tokens.data, y_test),
    epochs=1,  # Reduce the number of epochs
    batch_size=16  # Reduce the batch size
)

# Evaluate the model
loss, accuracy = model.evaluate(X_test_tokens.data, y_test, batch_size=16)  # Use the same reduced batch size
print(f'Loss: {loss}, Accuracy: {accuracy}')

# Save the model
model.save_pretrained('emotion_model')


All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loss: 0.1291562020778656, Accuracy: 0.9327096343040466


In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from transformers import BertTokenizer, TFBertForSequenceClassification

# Load the saved BERT model
max_length = 64 
saved_model = TFBertForSequenceClassification.from_pretrained('emotion_model')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and preprocess test data to match the input shape used during training
X_test_tokens = tokenizer(list(X_test), padding=True, truncation=True, max_length=max_length, return_tensors='tf')

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)  # Adjust the learning rate if necessary
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
saved_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

# Evaluate the BERT model on the test dataset
loss, accuracy = saved_model.evaluate(X_test_tokens.data, y_test, batch_size=16)  # Use the same batch size

# Make predictions on the test dataset
y_pred = saved_model.predict(X_test_tokens.data)
y_pred_classes = np.argmax(y_pred.logits, axis=1)

# Calculate precision, recall, and F1-score
precision_bert = precision_score(y_test, y_pred_classes, average='weighted')
recall_bert = recall_score(y_test, y_pred_classes, average='weighted')
f1_bert = f1_score(y_test, y_pred_classes, average='weighted')

# Print evaluation metrics
print("BERT Model Evaluation:")
print(f"Loss: {loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision_bert:.4f}")
print(f"Recall: {recall_bert:.4f}")
print(f"F1-Score: {f1_bert:.4f}")

# Detailed classification report
report = classification_report(y_test, y_pred_classes, target_names=emotions)
print("\nClassification Report:\n", report)


Some layers from the model checkpoint at emotion_model were not used when initializing TFBertForSequenceClassification: ['dropout_224']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at emotion_model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


BERT Model Evaluation:
Loss: 0.1184
Accuracy: 0.9375
Precision: 0.9413
Recall: 0.9375
F1-Score: 0.9374

Classification Report:
               precision    recall  f1-score   support

       anger       0.92      0.97      0.94     11339
        fear       0.88      0.94      0.91      9376
         joy       0.97      0.93      0.95     28247
        love       0.79      0.93      0.86      6853
     sadness       0.98      0.97      0.97     24504
    surprise       0.98      0.65      0.78      3043

    accuracy                           0.94     83362
   macro avg       0.92      0.90      0.90     83362
weighted avg       0.94      0.94      0.94     83362

