Imports

In [None]:
!pip install datasets
!pip install transformers==4.29.0
!pip install evaluate
!pip install --upgrade accelerate

from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer

import numpy as np
import pandas as pd
import evaluate

from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Step 1: Preprocessing by tokenizing 
and converting the text to numerical representations using word embeddings.

In [None]:
# Load the YelpReviewFull dataset
dataset = load_dataset("yelp_review_full")

# Load the tokenizer
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Preprocess Function (tokenizing text)
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Preprocess the input text
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Split into train and eval datasets and shuffle them
train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(3000))
eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(3000))



  0%|          | 0/2 [00:00<?, ?it/s]



Map:   0%|          | 0/50000 [00:00<?, ? examples/s]



Step 2: Implementing a deep learning model for sentiment analysis using the BERT pre-trained transformer model.

In [None]:
# Load the BERT pre-trained transformer model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Step 3: Training the model on the preprocessed dataset and evaluating its performance using accuracy, precision, recall, and F1-score. 

In [None]:
# Specifying the training hyperparameters and where to save the evaluation metric at the end of each epoch
batch_size = 10
num_epochs = 7
learning_rate = 0.0001
steps_per_epoch = len(train_dataset) / batch_size
training_args = TrainingArguments(
    output_dir="test_trainer", 
    evaluation_strategy="epoch",
    logging_steps=steps_per_epoch, 
    per_device_train_batch_size=batch_size, 
    num_train_epochs=num_epochs,
    learning_rate=learning_rate,
)

# Function to compute my metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

# Creating Trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

# Fine-tuning model 
trainer.train()




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3084,1.104245,0.516333,0.501039,0.516333,0.494909
2,1.0061,1.127172,0.512,0.588346,0.512,0.495156
3,0.7789,1.092295,0.56,0.590372,0.56,0.561983
4,0.5152,1.300106,0.560333,0.566667,0.560333,0.554981
5,0.3167,1.902629,0.544,0.587623,0.544,0.548753
6,0.1656,2.437926,0.565,0.58195,0.565,0.570156
7,0.0602,2.742888,0.560667,0.587502,0.560667,0.566518


TrainOutput(global_step=2100, training_loss=0.5930179532368978, metrics={'train_runtime': 2808.1716, 'train_samples_per_second': 7.478, 'train_steps_per_second': 0.748, 'total_flos': 5525480991744000.0, 'train_loss': 0.5930179532368978, 'epoch': 7.0})

Step 5: Comparing the performance of my BERT pre-trained transformer model with a logistic regression model.

In [None]:
# Convert tokenized datasets to pandas DataFrames
train_df = pd.DataFrame(train_dataset)
eval_df = pd.DataFrame(eval_dataset)

# Convert tokenized text to actual text
train_texts = tokenizer.batch_decode(train_df["input_ids"], skip_special_tokens=True)
eval_texts = tokenizer.batch_decode(eval_df["input_ids"], skip_special_tokens=True)

# Convert tokenized text to bag-of-words representation
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(train_texts)
X_eval = vectorizer.transform(eval_texts)

# Define target labels
y_train = train_df["label"]
y_eval = eval_df["label"]

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on evaluation dataset
pred_eval = model.predict(X_eval)

# Compute evaluation metrics
accuracy = accuracy_score(y_eval, pred_eval)
precision, recall, f1, _ = precision_recall_fscore_support(y_eval, pred_eval, average='weighted')

# Print evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("\n")

Accuracy: 0.473
Precision: 0.46952058355870363
Recall: 0.473
F1-Score: 0.4710250316122826




STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
