In [1]:
import pandas as pd
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import tensorflow as tf


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Load the dataset from 'dvd.csv'
df = pd.read_csv("dvd.csv")


In [None]:
# Assuming "star_rating" is the label and "review_body" is the text
df = df[['star_rating', 'review_body']].dropna()  # Remove rows with missing values
df['star_rating'] = (df['star_rating'] > 3).astype(int)  # Convert ratings into binary sentiment


In [None]:
# Split into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['review_body'], df['star_rating'], test_size=0.2, random_state=42
)


In [None]:
# Tokenize using BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
max_length = 128

train_encodings = tokenizer(
    list(train_texts),
    truncation=True,
    padding=True,
    max_length=max_length,
    return_tensors="tf"
)
test_encodings = tokenizer(
    list(test_texts),
    truncation=True,
    padding=True,
    max_length=max_length,
    return_tensors="tf"
)


In [None]:
# Prepare TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices(({
    'input_ids': train_encodings['input_ids'], 
    'attention_mask': train_encodings['attention_mask']
}, train_labels.values)).shuffle(1000).batch(32)

test_dataset = tf.data.Dataset.from_tensor_slices(({
    'input_ids': test_encodings['input_ids'], 
    'attention_mask': test_encodings['attention_mask']
}, test_labels.values)).batch(32)


In [None]:
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
attention_mask = Input(shape=(max_length,), dtype=tf.int32, name='attention_mask')

bert_output = bert_model(input_ids, attention_mask=attention_mask)
sequence_output = bert_output.last_hidden_state

bilstm_output = Bidirectional(LSTM(units=128, return_sequences=False))(sequence_output)
dropout = Dropout(0.3)(bilstm_output)
output = Dense(1, activation='sigmoid')(dropout)

model = Model(inputs=[input_ids, attention_mask], outputs=output)


In [None]:
# Compile the model
optimizer = Adam(learning_rate=2e-5)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=3
)


In [None]:
# Save the model and tokenizer
model.save('bert_bilstm_sentiment_model')
tokenizer.save_pretrained('bert_bilstm_tokenizer')
