In [14]:
pip install tensorflow transformers tf-keras


Collecting tf-keras
  Downloading tf_keras-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Downloading tf_keras-2.18.0-py3-none-any.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   - -------------------------------------- 0.1/1.7 MB 3.4 MB/s eta 0:00:01
   ---------- ----------------------------- 0.5/1.7 MB 7.4 MB/s eta 0:00:01
   ----------------------- ---------------- 1.0/1.7 MB 9.0 MB/s eta 0:00:01
   -------------------------- ------------- 1.1/1.7 MB 7.3 MB/s eta 0:00:01
   ---------------------------- ----------- 1.2/1.7 MB 6.5 MB/s eta 0:00:01
   ----------------------------- ---------- 1.3/1.7 MB 5.4 MB/s eta 0:00:01
   -------------------------------- ------- 1.4/1.7 MB 5.2 MB/s eta 0:00:01
   ---------------------------------------  1.7/1.7 MB 5.5 MB/s eta 0:00:01
   ---------------------------------------- 1.7/1.7 MB 5.0 MB/s eta 0:00:00
Installing collected packages: tf-keras
Successfully installed tf-keras-2.18.0


In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel
import torch.nn as nn
from torch.optim import Adam
from tqdm import tqdm




In [16]:
# Load the dataset
file_path = 'DVD11.csv'  # Replace with your training dataset path
test_file_path = 'Books11.csv'  # Replace with your test dataset path

train_data = pd.read_csv(file_path)
test_data = pd.read_csv(test_file_path)

train_data['sentiment'] = train_data['star_rating']
test_data['sentiment'] = test_data['star_rating']

# Encode sentiments
le = LabelEncoder()
train_data['sentiment_encoded'] = le.fit_transform(train_data['sentiment'])
test_data['sentiment_encoded'] = le.transform(test_data['sentiment'])

# Tokenize text using BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_data(texts, tokenizer, max_length):
    return tokenizer(
        texts.tolist(),
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors='pt'
    )

MAX_LENGTH = 128
train_encodings = tokenize_data(train_data['review_body'], tokenizer, MAX_LENGTH)
test_encodings = tokenize_data(test_data['review_body'], tokenizer, MAX_LENGTH)

train_labels = torch.tensor(train_data['sentiment_encoded'].values)
test_labels = torch.tensor(test_data['sentiment_encoded'].values)


In [17]:
class SentimentDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

train_dataset = SentimentDataset(train_encodings, train_labels)
test_dataset = SentimentDataset(test_encodings, test_labels)


In [18]:
class BertBiLSTMClassifier(nn.Module):
    def __init__(self, n_classes):
        super(BertBiLSTMClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.lstm = nn.LSTM(input_size=768, hidden_size=128, num_layers=1, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(128 * 2, n_classes)

    def forward(self, input_ids, attention_mask):
        with torch.no_grad():
            outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        lstm_out, _ = self.lstm(outputs.last_hidden_state)
        avg_pool = torch.mean(lstm_out, 1)
        logits = self.fc(avg_pool)
        return logits




In [19]:
def train_model(model, train_loader, optimizer, criterion, device, epochs=3):
    for epoch in range(epochs):
        # Training phase
        model.train()
        total_loss = 0
        correct_train_preds = 0
        total_train_samples = 0

        for batch in tqdm(train_loader):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            # Backward pass and optimizer step
            loss.backward()
            optimizer.step()

            # Calculate training accuracy
            preds = torch.argmax(outputs, dim=1)
            correct_train_preds += (preds == labels).sum().item()
            total_train_samples += labels.size(0)

        # Compute average training loss and accuracy
        avg_train_loss = total_loss / len(train_loader)
        train_accuracy = correct_train_preds / total_train_samples

        # Print results for the epoch
        print(f"Epoch {epoch + 1}/{epochs}")
        print(f"Training Loss: {avg_train_loss:.3f}, Training Accuracy: {train_accuracy:.3f}")


In [21]:
def evaluate_model(model, test_loader, device):
    model.eval()
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in tqdm(test_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask)
            _, preds = torch.max(outputs, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    accuracy = accuracy_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions, average='weighted')
    precision = precision_score(true_labels, predictions, average='weighted')
    recall = recall_score(true_labels, predictions, average='weighted')

    # Print metrics
    print("Evaluation Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")

    # Classification Report
    print("\nClassification Report:")
    print(classification_report(true_labels, predictions, target_names=le.classes_))


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Exception encountered when calling layer 'tf_bert_model_1' (type TFBertModel).

Data of type <class 'keras.src.backend.common.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for input_ids.

Call arguments received by layer 'tf_bert_model_1' (type TFBertModel):
  • input_ids={'input_ids': '<KerasTensor shape=(None, 128), dtype=int32, sparse=False, name=input_ids>', 'attention_mask': '<KerasTensor shape=(None, 128), dtype=int32, sparse=False, name=attention_mask>'}
  • attention_mask=None
  • token_type_ids=None
  • position_ids=None
  • head_mask=None
  • inputs_embeds=None
  • encoder_hidden_states=None
  • encoder_attention_mask=None
  • past_key_values=None
  • use_cache=None
  • output_attentions=None
  • output_hidden_states=None
  • return_dict=None
  • training=False

In [None]:
# Hyperparameters
BATCH_SIZE = 16
EPOCHS = 8
LEARNING_RATE = 1e-5
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize model, optimizer, and loss function
model = BertBiLSTMClassifier(n_classes=len(le.classes_)).to(DEVICE)
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(model, train_loader, optimizer, criterion, DEVICE, epochs=EPOCHS)

# Evaluate the model
evaluate_model(model, test_loader, DEVICE)
