In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import pandas as pd
import sklearn
import warnings
import transformers
import tensorflow as ts
import numpy as np
import seaborn as sns
import tensorflow as tf 

In [None]:
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv("data.csv")

Unnamed: 0,review,model,sentiment
0,The movie was a complete snoozefest. I regret...,Gemini-1.5-Flash,negative
1,"Seriously, the worst venue I've ever been to. ...",Gemini-1.5-Flash,negative
2,"This event was so poorly organized, it was a j...",Gemini-1.5-Flash,negative
3,What a disaster of a movie! The acting was aw...,Gemini-1.5-Flash,negative
4,I wouldn't recommend this venue to my worst en...,Gemini-1.5-Flash,negative


In [None]:
data.head()

In [None]:
data.drop('model',axis=1,inplace = True)

In [None]:
data.tail()

In [None]:
# Check for missing values

missing_labels = data["sentiment"].isnull().sum()

In [None]:
print(f'number of missing labels: {missing_labels}')

In [None]:
missing_rows = data[data['sentiment'].isnull()]

In [None]:
print(missing_rows)

In [None]:
data.dropna(subset = ['sentiment'],inplace=True)
data = data.reset_index(drop=True)

In [None]:
print(data['sentiment'].isnull().sum())

In [None]:
print(data['review'].isnull().sum())

In [None]:
# Encode sentiment labels as integers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data['sentiment'] = le.fit_transform(data['sentiment'])

In [None]:
data.sample(5)

In [None]:
# compute token lengths for each review
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
length = data['review'].apply(lambda x: len(tokenizer.tokenize(x)))

In [None]:
print(length.describe())

In [None]:
print("empty reviews: ", ((length > 0) & (length <=1)).sum())
print("1-24 tokens: ", ((length > 1) & (length <=24)).sum())
print("24–64 tokens: ", ((length > 24) & (length <= 64)).sum())
print(">64 tokens: ", (length > 64).sum())

In [None]:
# Tokenize all reviews using BERT tokenizer with fixed-length padding and truncation
encoder = tokenizer(
    list(data['review']),
    truncation=True,
    max_length=64,
    padding="max_length",
    # Converts text into tensors suitable for TensorFlow models
    return_tensors="tf"
)

In [None]:
labels = data['sentiment'].values

In [None]:
print(labels)

In [None]:
# Convert input IDs and attention masks from tensors to NumPy arrays for TensorFlow compatibility
input_ids = encoder["input_ids"].numpy()    
attention_mask =  encoder["attention_mask"].numpy()

In [None]:
x = data['review'].values
y = data['sentiment'].values

In [None]:
print(encoder.keys())

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# Split input IDs, attention masks, and labels into training and testing sets
x_train_ids,x_test_ids,x_train_mask,x_test_mask,y_train,y_test = train_test_split(
    input_ids,
    attention_mask,
    labels,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

In [None]:
x_train_ids.shape   #the number of training data

In [None]:
num_train_steps = (6587//16)*3    #train_steps = (number of training data/batch_size)*epoch

In [None]:
print(x_train_ids.shape)
print(x_train_mask.shape)

In [None]:
import tensorflow.keras as keras
from transformers import TFBertForSequenceClassification
#model #1:
# Load pre-trained BERT model for sequence classification with the number of sentiment labels
model = TFBertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels = len(le.classes_),  # Number of output classes based on label encoder
)

# Define optimizer and loss function for training

optimizer = keras.optimizers.Adam(learning_rate=3e-5)
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping   #prevents using too much space
earlystop = EarlyStopping( 
    monitor = 'val_loss', 
    patience = 2, 
    restore_best_weights = True 
)

In [None]:
# Train the BERT model
bert = model.fit(
    {'input_ids':x_train_ids, 'attention_mask':x_train_mask, 'labels':y_train}, 
    validation_data = ({'input_ids':x_test_ids,'attention_mask':x_test_mask},y_test), # Includes validation data for performance monitoring and early stopping to prevent overfitting
    epochs=3,
    batch_size=16, 
    callbacks = [earlystop] 
)

In [None]:
#saving our model:

model.save_pretrained("my_tf_bert_model")
tokenizer.save_pretrained("my_tf_bert_model")

In [None]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(
    [x_test_ids,x_test_mask],
    y_test,
    batch_size=16
)

In [None]:
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

In [None]:
# Inference on a new example
text = "The movie was amazing!"
inputs = tokenizer(
    text,
    return_tensors="tf",
    truncation=True,
    padding="max_length",
    max_length=64
)

# Get model predictions
outputs = model(inputs)
pred = tf.argmax(outputs.logits, axis=1).numpy()[0]

print("Predicted class:", pred)

In [None]:
# Generate predictions and print classification report to evaluate model performance

from sklearn.metrics import classification_report, confusion_matrix

y_pred_probs = model.predict([x_test_ids, x_test_mask]).logits
y_pred = np.argmax(y_pred_probs, axis=1)

print(classification_report(y_test, y_pred))

In [None]:
label_mapping = dict(zip(le.classes_,le.transform(le.classes_)))
print(label_mapping)

In [None]:
label = ["Negative","Neutral","Positive"]

In [None]:
# Plot the confusion matrix to visualize model performance across sentiment classes

cm = confusion_matrix(y_test,y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Greens", xticklabels=label,yticklabels=label)
plt.ylabel("Actual")
plt.xlabel("Predicted")
plt.show()

In [None]:
from sklearn.metrics import classification_report

# Print classification report with precision, recall, and F1-score for each sentiment class

print(classification_report(y_test, y_pred, target_names=label))

In [None]:
# Generate classification report as a dict
report = classification_report(y_test, y_pred, target_names=label, output_dict=True)

# Convert to DataFrame
df = pd.DataFrame(report).transpose()

# Select only precision, recall, f1-score
df_metrics = df[['precision', 'recall', 'f1-score']].iloc[:-3]  # drop accuracy/avg rows

# Plot
df_metrics.plot(kind='bar', figsize=(8,6))
plt.title("Classification Report Metrics")
plt.ylabel("Score")
plt.ylim(0, 1.05)  # scores are between 0 and 1
plt.xticks(rotation=0)
plt.legend(loc='lower right')
plt.show()

In [None]:
from sklearn.metrics import f1_score

f1_macro = f1_score(y_test, y_pred, average="macro")
f1_micro = f1_score(y_test, y_pred, average="micro")
f1_weighted = f1_score(y_test, y_pred, average="weighted")

In [None]:
print("BERT Model F1 Scores:")
print(f"Micro F1: {f1_micro:.4f}")
print(f"Macro F1: {f1_macro:.4f}")
print(f"Weighted F1: {f1_weighted:.4f}")