In [6]:
!pip install transformers datasets
# Importing the necessary libraries with customized names
import pandas as review_pandas
import numpy as review_numpy
import torch as review_torch
from sklearn.model_selection import train_test_split as review_split
from sklearn.metrics import classification_report as review_report, confusion_matrix as review_matrix
import matplotlib.pyplot as review_plt
import seaborn as review_sns
from transformers import BertTokenizer as ReviewBertTokenizer, BertForSequenceClassification as ReviewBertModel, Trainer as ReviewTrainer, TrainingArguments as ReviewTrainingArgs
from datasets import Dataset as ReviewDataset, DatasetDict as ReviewDatasetDict

# Load the dataset
from google.colab import drive
drive.mount('/content/drive')
def load_and_preprocess_review_data(directory_file):
    """
    Load and preprocess the Review Dataset LLM from a CSV file.
    This function loads the dataset and maps the star ratings to binary sentiment labels.
    """
    try:
        # Load the dataset from a CSV file
        review_data = review_pandas.read_csv(directory_file)
    except review_pandas.errors.ParserError as e:
        raise e

    # Map the star ratings to binary sentiment labels: 1,2,3 -> negative (0) and 4,5 -> positive (1)
    review_data['sentiment'] = review_data['overall'].apply(lambda x: 0 if x in [1, 2, 3] else 1)
    review_data['content'] = review_data['reviewText']
    # Select only the required columns
    review_data = review_data[['content', 'sentiment']]

    return review_data


def tokenize_review_data(review_dataset, review_tokenizer):
    """
    Tokenize the Review Dataset LLM.
    This function applies the BERT tokenizer to the dataset.
    """
    def tokenize_review_function(examples):
        # Convert the input text to strings to ensure they are in the correct format
        texts = [str(text) for text in examples['content']]

        # Ensure the tokenizer receives a list of strings and includes 'labels'
        return review_tokenizer(
            texts,
            return_tensors='pt',  # Return PyTorch tensors
            max_length=128,
            truncation=True,
            padding='max_length'

        )

    # Apply the tokenization to the entire dataset and rename the 'sentiment' column to 'labels'
    tokenize_dataset = review_dataset.map(tokenize_review_function, batched=True)
    tokenize_dataset = tokenize_dataset.rename_column('sentiment', 'labels') # Rename 'sentiment' to 'labels'
    return tokenize_dataset


def fine_tune_review_bert_model(tokenize_datasets, output_dir='./results'):
    """
    Fine-tune the BERT model on the provided dataset.
    This function sets up the training arguments and fine-tunes the model.
    """
    review_tokenizer = ReviewBertTokenizer.from_pretrained('bert-base-uncased')
    bert_sentiment_model = ReviewBertModel.from_pretrained('bert-base-uncased', num_labels=2)

    review_args_train = ReviewTrainingArgs(
        per_device_train_batch_size=16, output_dir=output_dir, learning_rate=1e-5, weight_decay=0.01, metric_for_best_model="accuracy", save_strategy="epoch", logging_strategy="steps",
        evaluation_strategy="epoch", logging_steps=10, per_device_eval_batch_size=16, num_train_epochs=5, logging_dir='./logs', load_best_model_at_end=True, report_to="none"
    )


    review_trainer = ReviewTrainer(
        model=bert_sentiment_model, args=review_args_train,eval_dataset=tokenize_datasets['test'], train_dataset=tokenize_datasets['train'], tokenizer=review_tokenizer,
        compute_metrics=lambda p: {"accuracy": (review_numpy.argmax(p.predictions, axis=1) == p.label_ids).mean()}
    )
    # Training
    review_trainer.train()

    # Predict Result
    results_bert = review_trainer.evaluate()
    print("Results:")
    print(results_bert)

    return review_trainer, bert_sentiment_model, review_tokenizer


# Load and preprocess the dataset
review_data = load_and_preprocess_review_data('/content/drive/My Drive/amazon_reviews.csv')

# Split the data into train and test sets
training_data, testing_data = review_split(review_data, test_size=0.2, random_state=42, stratify=review_data['sentiment'])

# Convert pandas DataFrames to Hugging Face Datasets
training_dataset = ReviewDataset.from_pandas(training_data)
testing_dataset = ReviewDataset.from_pandas(testing_data)
review_dataset = ReviewDatasetDict({'train': training_dataset, 'test': testing_dataset})


# Initialize the pre-trained BERT tokenizer for text processing
review_tokenizer = ReviewBertTokenizer.from_pretrained('bert-base-uncased')

# Apply tokenization to preprocess the text data for the model
tokenized_review_datasets = tokenize_review_data(review_dataset, review_tokenizer)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Map:   0%|          | 0/3932 [00:00<?, ? examples/s]

Map:   0%|          | 0/983 [00:00<?, ? examples/s]

In [7]:
# Fine-tune the BERT model
review_trainer, review_model, review_tokenizer = fine_tune_review_bert_model(tokenized_review_datasets)

def evaluate_review_bert_model(review_trainer, tokenized_datasets, original_texts):
    """
    Evaluate the fine-tuned Review BERT model.
    This function generates predictions, prints a classification report, and shows some example predictions.
    """
    # Make predictions on the test set
    review_predictions = review_trainer.predict(tokenized_datasets['test'])

    # Generate the classification report
    y_true = review_predictions.label_ids
    y_pred = review_numpy.argmax(review_predictions.predictions, axis=1)
    print("Performance Metrics:")
    print(review_report(y_true, y_pred, target_names=["Negative", "Positive"]))

    # Show some examples of true labels, predicted labels, and review texts
    print("\nSample Predictions:")
    for i in range(5):  # Show 5 examples
        print(f"Review Text: {original_texts[i]}")
        print(f"Expected Outcome: {'Good Comment' if y_true[i] == 1 else 'Bad Comment'}")
        print(f"Model Prediction: {'Good Comment' if y_pred[i] == 1 else 'Bad Comment'}")
        print("-" + "-"*40)

# Assuming `original_texts` is a list of the original review texts from the test dataset
original_texts = testing_dataset['content']

# Call the evaluate function
evaluate_review_bert_model(review_trainer, tokenized_review_datasets, original_texts)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  review_trainer = ReviewTrainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1205,0.150768,0.948118
2,0.0921,0.162038,0.953204
3,0.1183,0.205562,0.955239
4,0.0587,0.169582,0.959308
5,0.0718,0.177553,0.961343


Results:
{'eval_loss': 0.17755328118801117, 'eval_accuracy': 0.9613428280773143, 'eval_runtime': 6.7728, 'eval_samples_per_second': 145.139, 'eval_steps_per_second': 9.154, 'epoch': 5.0}
Performance Metrics:
              precision    recall  f1-score   support

    Negative       0.86      0.71      0.78        93
    Positive       0.97      0.99      0.98       890

    accuracy                           0.96       983
   macro avg       0.91      0.85      0.88       983
weighted avg       0.96      0.96      0.96       983


Sample Predictions:
Review Text: I formatted this card as NTFS...loaded it up with a bunch of my kids' favorite movie and TV shows. Transfers seem to be pretty speedy, even while playing HD content.
Expected Outcome: Good Comment
Model Prediction: Good Comment
-----------------------------------------
Review Text: Within an hour of using the memory card, it failed and died. it kept saying &#34;Memory error.&#34;I could never get my dashcam camera to record any