**BERT PRETRAINED MODEL**

Using https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment

If you want to just test the model run all blocks except the training part.
There is an evaluation method at the end.

In [1]:
%pip install pandas scikit-learn numpy nltk torch transformers

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader, Dataset

**Load in CSV file for training and testing**

80% training 10% testing 10% validation

In [4]:
# Load the data into a Pandas DataFrame
reviews_df = pd.read_csv('bert/tripadvisor_hotel_reviews.csv')

In [5]:
def map_rating_to_sentiment(row):
    rating = row['Rating']
    if rating in [1,2]:
        return "Negative"
    elif rating in [3]:
        return "Neutral"
    elif rating in [4,5]:
        return "Positive"
    return "Unknown"
def map_rating_to_sentiment_score(row):
    rating = row['Rating']
    if rating in [1,2]:
        return 1
    elif rating in [3]:
        return 2
    elif rating in [4,5]:
        return 3
    return -1
reviews_df['Sentiment'] = reviews_df.apply(map_rating_to_sentiment, axis=1)
reviews_df['Sentiment Score'] = reviews_df.apply(map_rating_to_sentiment_score, axis=1)

In [6]:
no_of_reviews = len(reviews_df)
sections = [int(0.8 * no_of_reviews), int(0.9 * no_of_reviews)]

train_df, test_df, val_df = np.split(
    ary = reviews_df,             # Array to split (i.e. our DataFrame of reviews)
    indices_or_sections = sections          # Sections to split (i.e. split at 80% and 90% mark)
)

**Load in BERT Tokenizer**

Set up data preprocessing pipeline for sentiment analysis on the reviews.

In [13]:
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# Define a function to preprocess the text inputs
def preprocess(text):
    # Tokenize the text
    tokenized_text = tokenizer.encode_plus(text, truncation=True, padding=True)

    # Extract the input_ids and attention_mask tensors from the tokenized text
    input_ids = torch.tensor(tokenized_text['input_ids'])
    attention_mask = torch.tensor(tokenized_text['attention_mask'])

    # Return the preprocessed text inputs as a dictionary
    return {'input_ids': input_ids, 'attention_mask': attention_mask}

class TripAdvisorDataset(Dataset):
    def __init__(self, data):
        self.encodings = tokenizer(list(data['Review']), truncation=True, padding=True)
        self.labels = torch.tensor(list(data['Rating'] - 1))

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx].clone().detach()
        return item

    def __len__(self):
        return len(self.labels)

# Create data loaders for the training and testing sets
train_dataset = TripAdvisorDataset(train_df)
test_dataset = TripAdvisorDataset(test_df)
val_dataset = TripAdvisorDataset(val_df)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)

**Define BERT model**

Set-up optimizer, learning rate scheduler and device.

In [14]:
from torch.optim import AdamW

# Define the BERT model
model = BertForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment', num_labels=5)

# Define the optimizer and learning rate scheduler
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
num_epochs = 4
num_training_steps = num_epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# Set up the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

**Train the model 4 epochs**

In [16]:
# Train the model
import time
start_time = time.time()

model.to(device)
model.train()
for epoch in range(num_epochs):
    train_loss = 0
    for batch in train_loader:
        # Load batch to GPU
        batch = {k: v.to(device) for k, v in batch.items()}
        
        # Zero out gradients from previous step
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(**batch)
        
        # Compute loss
        loss = outputs.loss
        
        # Backward pass
        loss.backward()
        
        # Update parameters and learning rate
        optimizer.step()
        scheduler.step()
        
        # Accumulate training loss
        train_loss += loss.item()
    
    # Evaluate the model on the test set after each epoch
    test_loss = 0
    test_preds = []
    test_labels = []
    
    # Set the model to evaluation mode
    model.eval()
    
    # Disable gradient computation
    with torch.no_grad():
        # Iterate over the test set
        for batch in test_loader:
            # Load batch to GPU
            batch = {k: v.to(device) for k, v in batch.items()}

            # Forward pass
            outputs = model(**batch)

            # Compute loss
            loss = outputs.loss

            # Accumulate test loss
            test_loss += loss.item()

            # Get predictions and convert to numpy arrays
            preds = outputs.logits.argmax(dim=-1).cpu().numpy()
            labels = batch['labels'].cpu().numpy()

            # Save predictions and labels for computing metrics
            test_preds.extend(preds)
            test_labels.extend(labels)
    
    # Compute test metrics
    test_acc = accuracy_score(test_labels, test_preds)
    test_f1 = f1_score(test_labels, test_preds, average='weighted')
    
    # Print training and test metrics for the epoch
    print(f'Epoch {epoch + 1}/{num_epochs}')
    print(f'Training loss: {train_loss / len(train_loader):.3f}')
    print(f'Test loss: {test_loss / len(test_loader):.3f}')
    print(f'Test accuracy: {test_acc:.3f}')
    print(f'Test F1 score: {test_f1:.3f}')
    
    # Set the model back to training mode
    model.train()
    
    
end_time = time.time()
elapsed_time = end_time - start_time

print(f"Elapsed time: {elapsed_time:.2f} seconds")

Epoch 1/4
Training loss: 1.009
Test loss: 0.850
Test accuracy: 0.626
Test F1 score: 0.616
Epoch 2/4
Training loss: 0.829
Test loss: 0.789
Test accuracy: 0.661
Test F1 score: 0.647
Epoch 3/4
Training loss: 0.698
Test loss: 0.793
Test accuracy: 0.672
Test F1 score: 0.673
Epoch 4/4
Training loss: 0.573
Test loss: 0.818
Test accuracy: 0.664
Test F1 score: 0.663
Elapsed time: 3710.12 seconds


**Save the model to disk**

In [17]:
model_save_path = './shitbert_standardised.pt'
torch.save(model.state_dict(), model_save_path)

**Evaluate Model**

In [18]:
# Load the saved state dict
model.load_state_dict(torch.load('./shitbert_standardised.pt'))
model.to(device)

# Set the model to evaluation mode
start_time = time.time()
model.eval()

# Initialize empty lists to store predicted and actual labels
y_pred = []
y_true = []

# Disable gradient computation
with torch.no_grad():
    # Iterate over the val set
    for batch in val_loader:
        # Load batch to GPU
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)

        # Get predictions and convert to numpy arrays
        preds = outputs.logits.argmax(dim=-1).cpu().numpy()
        labels = batch['labels'].cpu().numpy()

        # Save predictions and labels for computing metrics
        y_pred.extend(preds)
        y_true.extend(labels)

# Print 1-5 evaluation
target_names = ['1', '2', '3', '4', '5']
print(classification_report(y_true, y_pred, target_names=target_names))        

# Print Negative Neutral Positive evaluation
label_map_reverse = {0: 'Negative', 1: 'Negative', 2: 'Neutral', 3: 'Positive', 4: 'Positive'}
y_true = [label_map_reverse[val] for val in y_true]
y_pred = [label_map_reverse[val] for val in y_pred]

print("\n\n\n\n")
# Compute classification report
target_names = ['Negative', 'Neutral', 'Positive']
print(classification_report(y_true, y_pred, target_names=target_names))

end_time = time.time()

# Compute the elapsed time
elapsed_time = end_time - start_time

# Print the elapsed time
print(f"Elapsed time: {elapsed_time:.2f} seconds")

              precision    recall  f1-score   support

           1       0.69      0.65      0.67       114
           2       0.47      0.41      0.44       166
           3       0.44      0.41      0.42       204
           4       0.54      0.54      0.54       585
           5       0.78      0.81      0.79       981

    accuracy                           0.65      2050
   macro avg       0.58      0.56      0.57      2050
weighted avg       0.64      0.65      0.65      2050






              precision    recall  f1-score   support

    Negative       0.80      0.72      0.76       280
     Neutral       0.44      0.41      0.42       204
    Positive       0.93      0.96      0.94      1566

    accuracy                           0.87      2050
   macro avg       0.72      0.70      0.71      2050
weighted avg       0.87      0.87      0.87      2050

Elapsed time: 35.27 seconds


In [19]:
from IPython.display import Audio
sound_file = 'notification2.mp3'
display(Audio(sound_file, autoplay=True))

In [20]:
from IPython.display import Audio
sound_file = 'notification.mp3'
display(Audio(sound_file, autoplay=True))