# Sentiment Analysis

## Imports and load the data


In [47]:
# Set if runing local or usung colab
local = False

In [48]:
# Install vaderSentiment
if not local:
    !pip install vaderSentiment



In [49]:
# Impoirts
import json
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, BertForSequenceClassification, BertConfig
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from torch.optim import AdamW
import torch.nn.functional as F

# Set random seed
random_seed = 1

In [50]:
# Load data
if local:
    # Load csv file
    df = pd.read_csv('restaurant-reviews.csv')

else:
    # Authorize access to your google drive storage
    from google.colab import drive
    drive.mount('/content/drive')

    # Load csv file
    df = pd.read_csv('/content/drive/My Drive/SMA/restaurant-reviews.csv')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Initial Data Analysis

In [51]:
# Define a function to do initial data analysis (IDA)
def initial_data_analysis(df,name):
    """
    Gives information about the dataframe for a quick overview.
    Args:
        df (pandas.DataFrame): The dataframe to be analysed.
        name (str): The name of the dataframe.
    Returns:
        None
    """
    print(f'Initial data analysis for {name}:\n')
    print(f'Shape: {df.shape}\n')

    # look at distribution of the ratings
    print('Distribution of the ratings:')
    print(df['rating'].value_counts().sort_index())


    column_name = []
    dtype = []
    count = []
    unique = []
    missing_values = []
    # create a list of column names, data types, number of non-null values, number of unique values and number of missing values
    for column in df.columns:
        column_name.append(column)
        dtype.append(df[column].dtype)
        count.append(len(df[column]))
        unique.append(len(df[column].unique()))
        missing_values.append(df[column].isna().sum())

    # create a dataframe consisting of the lists
    overview_values = pd.DataFrame({'column_name': column_name,'dtype': dtype,'count': count,'unique': unique,'missing_values': missing_values})
    display(overview_values)

    # sum up all the values in missing_values to get the total number of missing values
    missing_val  =  sum(missing_values) #sum of missing values
    print(f'Sum of missing values: {missing_val}\n')
    total_cells = np.prod(df.shape) # get total number of values in the dataframe
    print(f'Percentage of null values: {missing_val/total_cells*100:.2f}%\n')

    #check for duplicates
    print('Number of duplicates of each class:')
    print(df[df.duplicated()].groupby('rating')['rating'].count())
    print(' ')

    # calculate the percentage of the of the number of duplicates of each class
    print('Percentage of duplicates of each rating:')
    print(df[df.duplicated()].groupby('rating')['rating'].count()/df['rating'].value_counts()*100)
    print(' ')

    print('Head:')
    display(df.head())

    # get descriptive statistics for the numerical columns
    print('Discribe:')
    display(df.describe().round(2))
    print(' ') # do a linebreak

In [52]:
initial_data_analysis(df,'Restaurant reviews')

Initial data analysis for Restaurant reviews:

Shape: (1000, 5)

Distribution of the ratings:
1.0     40
2.0     54
3.0    104
4.0    333
5.0    469
Name: rating, dtype: int64


Unnamed: 0,column_name,dtype,count,unique,missing_values
0,name,object,1000,129,0
1,restaurant_url,object,1000,129,0
2,title,object,1000,919,0
3,text,object,1000,946,0
4,rating,float64,1000,5,0


Sum of missing values: 0

Percentage of null values: 0.00%

Number of duplicates of each class:
rating
1.0     3
2.0     5
3.0     4
4.0    19
5.0    23
Name: rating, dtype: int64
 
Percentage of duplicates of each rating:
1.0    7.500000
2.0    9.259259
3.0    3.846154
4.0    5.705706
5.0    4.904051
Name: rating, dtype: float64
 
Head:


Unnamed: 0,name,restaurant_url,title,text,rating
0,Manufactur,https://www.tripadvisor.com/Restaurant_Review-...,Best in Kiel,The absolutely best restaurant in the town of ...,5.0
1,Manufactur,https://www.tripadvisor.com/Restaurant_Review-...,"Simply, tasty and very good",Tasty and high quality food! A “healthier”way ...,5.0
2,Manufactur,https://www.tripadvisor.com/Restaurant_Review-...,Delicious fast food!,The food was more than we asked for and we whe...,5.0
3,Manufactur,https://www.tripadvisor.com/Restaurant_Review-...,Manufactur,They have some amazing service amzing food and...,5.0
4,Manufactur,https://www.tripadvisor.com/Restaurant_Review-...,clear but appealing menu: you will find what y...,Manufaktur is really a nice small self service...,5.0


Discribe:


Unnamed: 0,rating
count,1000.0
mean,4.14
std,1.06
min,1.0
25%,4.0
50%,4.0
75%,5.0
max,5.0


 


## Splitting the data into train and test sets

In [53]:
# Split data into features and target

# features
#X = df['text'] # just use text
X = df['title'] + ' ' + df['text'] # Combine title and text

# target
y = df['rating'] # target

In [54]:
# Create a single train-test split with stratification using the subset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=random_seed, stratify=y, shuffle=True)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((600,), (400,), (600,), (400,))

In [55]:
# Create an empty DataFrame to store performance scores
df_performance = pd.DataFrame(columns=['approach', 'accuracy', 'recall', 'precision', 'f1'])

def calculate_scores(y_true, y_pred, approach):
    """
    Calculates accuracy, recall, precision, and F1 score and returns the scores as a dictionary.
    Args:
        y_true (pandas.Series): The true labels.
        y_pred (pandas.Series): The predicted labels.
        approach (str): The name of the approach.
    Returns:
        dict: A dictionary containing the performance scores.
    """
    # Calculate accuracy, recall, precision, and F1 score
    accuracy = accuracy_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred, average='macro')
    precision = precision_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')

    # Create a dictionary with the scores
    scores_dict = {'approach': approach, 'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1}

    return scores_dict

## 1. Heuristic approach with VaderSentiment Analysis

Description of vaderSentiment Algorithm:

The vaderSentiment algorithm (Valence Aware Dictionary and sentiment Reasoner), is a heuristic-based approach for sentiment analysis.

The vanderSentiment algorithm assesses the polarity of a text wtith the following steps:

<b>1. Lexicon-Based Scoring:</b> VADER uses a pre-built lexicon (dictionary) of words with associated sentiment scores ranging from -1 (most negative) to 1 (most positive). It also includes intensity modifiers to account for sentiment strength.

<b>2. Polarity Detection:</b> The algorithm analyzes text considering individual words, phrases, and context. It detects positive and negative sentiment indicators, neutral words, and handles:
- *Polarity Shifting:* Recognizes when words modify nearby words' sentiments, e.g., "not good" is negative due to "not."
- *Negations:* Identifies negations that reverse sentiments, e.g., "don't like" is negative due to "don't."
- *Amplifiers and Dampeners:* Accounts for intensifiers (e.g., "extremely good") and dampeners in sentiment assessment.
- *Exclamation Marks and Punctuation:* Considers punctuation, especially exclamation marks, as indicators of sentiment intensity.
- *Capitalization:* Acknowledges capitalization's impact on sentiment intensity.
- *Emojis and Emoticons:* Recognizes and interprets these symbols for sentiment analysis.

<b>3. Sentiment Intensity:</b> VADER assesses intensity using capitalization, punctuation, and other features.

<b>4. Compound Score:</b> After analysis, VADER generates a compound sentiment score between -1 and 1:
    Compound score > 0: Positive Sentiment
    Compound score < 0: Negative Sentiment
    Compound score = 0: Neutral Sentiment

<b> Note: </b> For VanderSentiment Analysis no splitting of data is required because no model is trained.To compare the results with the following approaches the test data is used.

In [56]:
# Function to map compound score to a rating scale (1-5)

def map_sentiment_score_to_rating(compound_score):
    if compound_score >= 0.6:
        return 5
    elif compound_score >= 0.2:
        return 4
    elif compound_score >= -0.2:
        return 3
    elif compound_score >= -0.6:
        return 2
    else: # compound_score < -0.6
        return 1

In [57]:
# Initialize the vaderSentiment analyzer
analyzer = SentimentIntensityAnalyzer()


# Create an empty Pandas Series to store the sentiment ratings
sentiment_ratings_heuristic  = pd.Series()

for index, row in X_test.items():  # Iterate through the Series using iteritems()
    # Calculate the sentiment score using vaderSentiment
    sentiment_scores = analyzer.polarity_scores(row)

    # Get the compound score
    compound_score = sentiment_scores['compound']

    # Map the compound score to a rating scale from 1 to 5
    sentiment_rating = map_sentiment_score_to_rating(compound_score)

    # Append the sentiment rating to the Series
    sentiment_ratings_heuristic.at[index] = sentiment_rating


  sentiment_ratings_heuristic  = pd.Series()


In [58]:
# Calculate the performance scores
scores = calculate_scores(y_test, sentiment_ratings_heuristic, 'Heuristic')

# Concatenate with the DataFrame containing the scores
df_performance = pd.concat([df_performance, pd.DataFrame([scores])], ignore_index=True)

## 2. Finetuned transformer model

**Why this model?:**
The [following model](https://huggingface.co/LiYuan/amazon-review-sentiment-analysis?text=I+aet+a+several+times+there+and+it+was+everytime+very+good.+The+waitresses+are+polite+and+nice+an+the+steak+was+very+good) was chosen because it was trained on reviews, not on restaurant reviews but on amazon reviews. So the task is similar to the one here. The second reason this model was chosen is because it retun a rating from 1 to 5, so not further mapping is required.


In [59]:
# Choose a model
model_name = "LiYuan/amazon-review-sentiment-analysis"

# Initialize the model
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

The model can only handle token indices sequence lengthof 512 tokens, therefore the tokes are  truncated  to a length of 512 tokens.

In [60]:
# Create an empty Pandas Series to store the sentiment ratings
sentiment_ratings_pre_trained = pd.Series()

for index, row in X_test.items():
    # Tokenize and truncate the input reviews
    tokenized_inputs = tokenizer(
        row,
        return_tensors="pt",    # Return PyTorch tensors
        truncation=True,        # Truncate if needed
        max_length=512,         # Max length
        padding=False           # Do not add padding
    )

    # Get the model outputs
    outputs = model(**tokenized_inputs)

    # Apply softmax to get the probabilities for each class
    probs = F.softmax(outputs.logits, dim=1)

    # Map probabilities to star ratings (1 to 5)
    star_ratings = (torch.argmax(probs, dim=1) + 1).item()

    # Append the sentiment rating to the Series
    sentiment_ratings_pre_trained.at[index] = star_ratings


  sentiment_ratings_pre_trained = pd.Series()


In [61]:
# Calculate the performance scores
scores = calculate_scores(y_test, sentiment_ratings_pre_trained, 'Already trained model')

# Concatenate with the DataFrame containing the scores
df_performance = pd.concat([df_performance, pd.DataFrame([scores])], ignore_index=True)

## 3. Train a model yourself

In [62]:
# Define a custom estimator class
class BERTClassifier:
    def __init__(self, model_name, num_labels, lr, eps, epochs, batch_size):
        self.model_name = model_name
        self.num_labels = num_labels
        self.lr = lr
        self.eps = eps
        self.epochs = epochs
        self.batch_size = batch_size

        # Load the pre-trained BERT model and tokenizer
        self.config = BertConfig.from_pretrained(model_name, num_labels=num_labels)
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertForSequenceClassification.from_pretrained(model_name, config=self.config)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

    def set_params(self, **params):
        # Dummy method to satisfy scikit-learn's requirements
        return self

    def fit(self, X, y):
        # Tokenize the training data
        tokenized_data_train = self.tokenizer(X.to_list(), padding=True, truncation=True, return_tensors='pt', max_length=512)

        # Create DataLoader for training data
        train_data = TensorDataset(tokenized_data_train['input_ids'], torch.tensor(y.values, dtype=torch.long))
        train_sampler = RandomSampler(train_data)
        train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.batch_size)

        # Define optimizer and set other training parameters
        optimizer = AdamW(self.model.parameters(), lr=self.lr, eps=self.eps)

        # Fine-tune the model
        self.model.train()
        for epoch in range(self.epochs):
            for batch in train_dataloader:
                batch = tuple(t.to(self.device) for t in batch)
                inputs = {'input_ids': batch[0], 'labels': batch[1]}
                outputs = self.model(**inputs)
                loss = outputs.loss
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

    def predict(self, X):
        # Tokenize the test data
        tokenized_data_test = self.tokenizer(X.to_list(), padding=True, truncation=True, return_tensors='pt', max_length=512)

        # Create DataLoader for testing data
        test_inputs = {'input_ids': tokenized_data_test['input_ids'].to(self.device)}

        # Make predictions and evaluate the model's performance
        self.model.eval()
        with torch.no_grad():
            test_outputs = self.model(**test_inputs)

        # Apply softmax to get the probabilities for each class
        probs = F.softmax(test_outputs.logits, dim=1)

        # Add 1 to get star ratings (1 to 5) instead of labels (0 to 4)
        sentiment_ratings_fine_tuned = torch.argmax(probs, dim=1) + 1

        # Convert the predictions to a list
        predictions = sentiment_ratings_fine_tuned.tolist()

        return predictions

    def get_params(self, deep=True):
        # Return the parameter dictionary of the estimator
        return {
            'model_name': self.model_name,
            'num_labels': self.num_labels,
            'lr': self.lr,
            'eps': self.eps,
            'epochs': self.epochs,
            'batch_size': self.batch_size
        }

    def state_dict(self):
        # Return the state dictionary of the estimator (Is a snapshot of the estimator’s parameters and internal state.)
        return {
            'model_state_dict': self.model.state_dict(),  
        }


In [63]:
# Adjust ratings to a 0-4 scale
y_train = y_train - 1
y_test = y_test - 1

# Define the hyperparameter grid
param_grid = {
    'model_name': ['bert-base-uncased'],
    'num_labels': [5],
    'lr': [2e-5, 3e-5],
    'eps': [1e-7, 1e-6],
    'epochs': [3, 4],
    'batch_size': [8,16]
}

# Initialize the custom estimator
custom_estimator = BERTClassifier(model_name='gaunernst/bert-small-uncased', num_labels=5, lr=2e-5, eps=1e-8, epochs=4, batch_size=16)

# Initialize the cross-validator
cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=random_seed)

# Create the GridSearchCV object
grid_search = GridSearchCV(
    estimator=custom_estimator, # Use the custom estimator
    param_grid=param_grid, # Use the hyperparameter grid
    scoring='f1_macro',  # Use F1 score for evaluation
    cv=cv,  # Use the cross-validator
    verbose=2 # Print verbose output
)

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and corresponding F1 score
print("Best Hyperparameters: ", grid_search.best_params_)
print("Best F1 Score: ", grid_search.best_score_)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fitting 2 folds for each of 16 candidates, totalling 32 fits


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.1s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=3, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.1s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=8, epochs=4, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.1s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=3, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-07, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-07, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-06, lr=2e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[CV] END batch_size=16, epochs=4, eps=1e-06, lr=3e-05, model_name=bert-base-uncased, num_labels=5; total time=   6.0s


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at gaunernst/bert-small-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Best Hyperparameters:  {'batch_size': 8, 'epochs': 3, 'eps': 1e-06, 'lr': 3e-05, 'model_name': 'bert-base-uncased', 'num_labels': 5}
Best F1 Score:  0.0533696774996079


In [64]:
# Get the best model from GridSearchCV
best_estimator = grid_search.best_estimator_

# Get predictions from the best model
sentiment_ratings_fine_tuned = best_estimator.predict(X_test)

In [65]:
# Save the fine-tuned model
if local:
    # Saving the model locally
    torch.save(best_estimator.state_dict(), 'bert-small-uncased-finetuned-restaurant-reviews.pth')
else:
    # Saving the model to Google Drive
    torch.save(best_estimator.state_dict(), 'drive/MyDrive/SMA/bert-small-uncased-finetuned-restaurant-reviews.pth')

In [None]:
# Calculate the performance scores
scores = calculate_scores(y_test, sentiment_ratings_fine_tuned, 'Fine-tuned small BERT')

# Concatenate with the DataFrame containing the scores
df_performance = pd.concat([df_performance, pd.DataFrame([scores])], ignore_index=True)

In [67]:
# Print the table with the performance scores
df_performance

Unnamed: 0,approach,accuracy,recall,precision,f1
0,Heuristic,0.465,0.313494,0.260608,0.259106
1,Already trained model,0.6125,0.556964,0.594299,0.510371
2,Fine-tuned small BERT,0.1725,0.06117,0.055288,0.058081


# Summary

**Main Findings:**

*Heuristic Approach:* This approach showed moderate performance with an accuracy of 46.50%. It demonstrated decent recall, precision, and F1 score, indicating its ability to capture sentiment to some extent.

*Already Trained Model:*  The use of an already trained model resulted in the best performance among the three approaches. It achieved an accuracy of 61.25% and demonstrated higher recall, precision, and F1 score, suggesting its effectiveness in sentiment analysis.

*Fine-tuned Small BERT:* The fine-tuned small BERT model showed the lowest performance with an accuracy of 17.25%. It had the lowest recall, precision, and F1 score, indicating that further adjustments or hyperparameter tuning may be needed.

**Recommendations:**

*Choose the Already Trained Model:* The "Already Trained Model" approach outperformed the other methods in terms of accuracy and overall sentiment analysis metrics. I recommend to adopt this approach for predicting restaurant review sentiments.

*Improve Fine-tuned BERT:* I suggest to explore if further improvements could be achieved. This can include additional fine-tuning iterations, adjusting hyperparameters, or considering a larger BERT model. Also training the model with more data could be useful to improve the performance.

**Suggestions for Future Improvements:**

*Data Augmentation:* Collecting more diverse and extensive restaurant review data could improve the performance of the self trained model.

*Hyperparameter Tuning:* Continue experimenting with different hyperparameter settings, such as learning rates, batch sizes, and optimization algorithms, could lead to better model performance.

*Sentiment Analysis for Title and Text:* Currently, both title and text are used for sentiment analysis. Using only one of them could lead to better results. 

*User Feedback Loop:* Utilize user feedback and continuously update the model to adapt to changing customer sentiments and language trends.