In [2]:
import os
import random
import string
from collections import Counter

import numpy as np
import pandas as pd
from tqdm import tqdm, auto as tqdm_auto

import nltk
from nltk import pos_tag
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer
nltk.download('punkt')

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

from gensim.models import Word2Vec

from datasets import Dataset, DatasetDict
from transformers import (BertConfig, BertTokenizerFast, BertForSequenceClassification, 
                          TrainingArguments, Trainer, AutoTokenizer, AutoModelForSequenceClassification, pipeline)
from evaluate import load
import torch


In [3]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sun Mar 24 12:39:01 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   66C    P8              11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
# {"username":"____","key":"___"}

In [5]:
dataset = 'https://www.kaggle.com/datasets/snap/amazon-fine-food-reviews'
od.download(dataset)

Skipping, found downloaded files in "./amazon-fine-food-reviews" (use force=True to force download)


In [6]:
df = pd.read_csv('amazon-fine-food-reviews/Reviews.csv')

In [7]:
df

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...
...,...,...,...,...,...,...,...,...,...,...
568449,568450,B001EO7N10,A28KG5XORO54AY,Lettie D. Carter,0,0,5,1299628800,Will not do without,Great for sesame chicken..this is a good if no...
568450,568451,B003S1WTCU,A3I8AFVPEE8KI5,R. Sawyer,0,0,2,1331251200,disappointed,I'm disappointed with the flavor. The chocolat...
568451,568452,B004I613EE,A121AA1GQV751Z,"pksd ""pk_007""",2,2,5,1329782400,Perfect for our maltipoo,"These stars are small, so you can give 10-15 o..."
568452,568453,B004I613EE,A3IBEVCTXKNOH,"Kathy A. Welch ""katwel""",1,1,5,1331596800,Favorite Training and reward treat,These are the BEST treats for training and rew...


In [8]:
df['Label'] = ['positive' if score >= 3 else 'negative' for score in df['Score']]

In [9]:
positive_reviews = df[df['Label'] == 'positive'].shape[0]
negative_reviews = df[df['Label'] == 'negative'].shape[0]

print(f"Number of positive reviews: {positive_reviews}")
print(f"Number of negative reviews: {negative_reviews}")

Number of positive reviews: 486417
Number of negative reviews: 82037


In [10]:
df_downsampled = df.groupby('Label').apply(pd.DataFrame.sample, n=min(positive_reviews, negative_reviews), random_state=0)
df_downsampled = df_downsampled.reset_index(drop=True)

In [11]:
df = df_downsampled

In [12]:
positive_reviews = df[df['Label'] == 'positive'].shape[0]
negative_reviews = df[df['Label'] == 'negative'].shape[0]

print(f"Number of positive reviews: {positive_reviews}")
print(f"Number of negative reviews: {negative_reviews}")

Number of positive reviews: 82037
Number of negative reviews: 82037


In [18]:
df_sampled = df.sample(frac=0.15, random_state=0)

X_train, X_test, y_train, y_test = train_test_split(df_sampled['Text'], df_sampled['Label'], test_size=0.2, random_state=0)

print(f"Number of training examples: {X_train.shape[0]}")
print(f"Number of testing examples: {X_test.shape[0]}")

Number of training examples: 19688
Number of testing examples: 4923


# 1) TFIDF approach

In [15]:
tfidf_vectorizer = TfidfVectorizer(max_df=0.7)
tfidf_train = tfidf_vectorizer.fit_transform(X_train)
tfidf_test = tfidf_vectorizer.transform(X_test)

In [17]:
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(tfidf_train, y_train)

predictions = lr_model.predict(tfidf_test)

lr_tfidf_accuracy = accuracy_score(y_test, predictions)
lr_tfidf_f1 = f1_score(y_test, predictions, average='binary', pos_label='positive')
lr_tfidf_recall = recall_score(y_test, predictions, average='binary', pos_label='positive')
lr_tfidf_precision = precision_score(y_test, predictions, average='binary', pos_label='positive')

print(f"Accuracy: {lr_tfidf_accuracy}")
print(f"F1 Score: {lr_tfidf_f1}")
print(f"Recall: {lr_tfidf_recall}")
print(f"Precision: {lr_tfidf_precision}")

Accuracy: 0.8596384318504977
F1 Score: 0.8569654315876629
Recall: 0.8448979591836735
Precision: 0.869382612347753


In [18]:
svm_model = SVC(kernel='linear')
svm_model.fit(tfidf_train, y_train)

predictions = svm_model.predict(tfidf_test)

svm_tfidf_accuracy = accuracy_score(y_test, predictions)
svm_tfidf_f1 = f1_score(y_test, predictions, average='binary', pos_label='positive')
svm_tfidf_recall = recall_score(y_test, predictions, average='binary', pos_label='positive')
svm_tfidf_precision = precision_score(y_test, predictions, average='binary', pos_label='positive')

print(f"Accuracy: {svm_tfidf_accuracy}")
print(f"F1 Score: {svm_tfidf_f1}")
print(f"Recall: {svm_tfidf_recall}")
print(f"Precision: {svm_tfidf_precision}")

Accuracy: 0.8616697135892748
F1 Score: 0.8577991229901858
Recall: 0.8383673469387755
Precision: 0.8781530568619068


In [19]:
gbm_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbm_model.fit(tfidf_train, y_train)

predictions_tfidf_gbm = gbm_model.predict(tfidf_test)

gbm_tfidf_accuracy = accuracy_score(y_test, predictions)
gbm_tfidf_f1 = f1_score(y_test, predictions, average='binary', pos_label='positive')
gbm_tfidf_recall = recall_score(y_test, predictions, average='binary', pos_label='positive')
gbm_tfidf_precision = precision_score(y_test, predictions, average='binary', pos_label='positive')

print(f"Accuracy: {gbm_tfidf_accuracy}")
print(f"F1 Score: {gbm_tfidf_f1}")
print(f"Recall: {gbm_tfidf_recall}")
print(f"Precision: {gbm_tfidf_precision}")

Accuracy: 0.8616697135892748
F1 Score: 0.8577991229901858
Recall: 0.8383673469387755
Precision: 0.8781530568619068


# 2) Classification by using word2vec

In [20]:
X_train_tokens = [word_tokenize(text.lower()) for text in X_train]
X_test_tokens = [word_tokenize(text.lower()) for text in X_test]

word2vec_model = Word2Vec(sentences=X_train_tokens, vector_size=100, window=5, min_count=1, workers=4)

def document_vector(word2vec_model, doc):
    doc = [word for word in doc if word in word2vec_model.wv.index_to_key]
    if len(doc) == 0:
        return np.zeros(word2vec_model.vector_size)
    else:
        return np.mean(word2vec_model.wv[doc], axis=0)

X_train_vec = np.array([document_vector(word2vec_model, doc) for doc in X_train_tokens])
X_test_vec = np.array([document_vector(word2vec_model, doc) for doc in X_test_tokens])

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [21]:
log_reg_model = LogisticRegression(max_iter=1000)
log_reg_model.fit(X_train_vec, y_train)

predictions = log_reg_model.predict(X_test_vec)
w2v_lr_accuracy = accuracy_score(y_test, predictions)
w2v_lr_f1 = f1_score(y_test, predictions, average='binary', pos_label='positive')
w2v_lr_recall = recall_score(y_test, predictions, average='binary', pos_label='positive')
w2v_lr_precision = precision_score(y_test, predictions, average='binary', pos_label='positive')

print(f"Accuracy: {w2v_lr_accuracy}")
print(f"F1 Score: {w2v_lr_f1}")
print(f"Recall: {w2v_lr_recall}")
print(f"Precision: {w2v_lr_precision}")

Accuracy: 0.7917936217753403
F1 Score: 0.7882668870068168
Recall: 0.7787755102040816
Precision: 0.7979924717691342


In [22]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_vec, y_train)

rf_predictions = rf_model.predict(X_test_vec)

w2v_rf_accuracy = accuracy_score(y_test, rf_predictions)
w2v_rf_f1 = f1_score(y_test, rf_predictions, average='binary', pos_label='positive')
w2v_rf_recall = recall_score(y_test, rf_predictions, average='binary', pos_label='positive')
w2v_rf_precision = precision_score(y_test, rf_predictions, average='binary', pos_label='positive')

print(f"Accuracy: {w2v_rf_accuracy}")
print(f"F1 Score: {w2v_rf_f1}")
print(f"Recall: {w2v_rf_recall}")
print(f"Precision: {w2v_rf_precision}")

Accuracy: 0.7812309567336989
F1 Score: 0.7770647898985716
Recall: 0.7661224489795918
Precision: 0.7883242335153297


In [23]:
svm_model = SVC(kernel='linear')

svm_model.fit(X_train_vec, y_train)

predictions = svm_model.predict(X_test_vec)

w2v_svm_accuracy = accuracy_score(y_test, predictions)
w2v_svm_precision = precision_score(y_test, predictions, average='macro')
w2v_svm_recall = recall_score(y_test, predictions, average='macro')
w2v_svm_f1 = f1_score(y_test, predictions, average='macro')

print(f'Accuracy: {w2v_svm_accuracy}')
print(f'Precision: {w2v_svm_precision}')
print(f'Recall: {w2v_svm_recall}')
print(f'F1 Score: {w2v_svm_f1}')

Accuracy: 0.7915904936014625
Precision: 0.7920454917513666
Recall: 0.7914948381293481
F1 Score: 0.7914686491919171


# 3) BERT (without fine-tune) for review classification

In [24]:
# !pip install -q transformers datasets
# !pip install -q tensorflow

In [26]:
classifier = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [27]:
tqdm.pandas()

def process_in_batches(text_series, batch_size=32):
    results = []
    for i in tqdm(range(0, len(text_series), batch_size)):
        batch = text_series[i:i+batch_size]
        batch_results = [classifier(text, truncation=True)[0]['label'] for text in batch]
        results.extend(batch_results)
    return results

batch_size = 512
X_test_processed = process_in_batches(X_test.str.slice(0, 512), batch_size=batch_size)

100%|██████████| 10/10 [10:52<00:00, 65.29s/it]


In [28]:
label_mapping = {'POSITIVE': 'positive', 'NEGATIVE': 'negative'}
mapped_labels = [label_mapping[label] for label in X_test_processed]

comparison_df = pd.DataFrame({'Actual Label': y_test.str.lower(), 'Predicted Label': mapped_labels})

bert_wo_ft_accuracy = accuracy_score(comparison_df['Actual Label'], comparison_df['Predicted Label'])
bert_wo_ft_precision = precision_score(comparison_df['Actual Label'], comparison_df['Predicted Label'], pos_label='positive')
bert_wo_ft_recall = recall_score(comparison_df['Actual Label'], comparison_df['Predicted Label'], pos_label='positive')
bert_wo_ft_f1 = f1_score(comparison_df['Actual Label'], comparison_df['Predicted Label'], pos_label='positive')

print(f"Accuracy: {bert_wo_ft_accuracy}")
print(f"Precision: {bert_wo_ft_precision}")
print(f"Recall: {bert_wo_ft_recall}")
print(f"F1 Score: {bert_wo_ft_f1}")

       Actual Label Predicted Label
125485     positive        positive
32396      negative        negative
108819     positive        negative
56031      negative        positive
122843     positive        positive
Accuracy: 0.8324192565508836
Precision: 0.8739070409572021
Recall: 0.7751020408163265
F1 Score: 0.8215444516547696


# 4) BERT (with fine-tune) for review classification

In [13]:
!pip install -q transformers[torch]

In [19]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [21]:
def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(1)

In [22]:
model_name = "bert-base-uncased"
tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True)

train_encodings = tokenizer(list(X_train), truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(list(X_test), truncation=True, padding=True, max_length=512)

In [23]:
label_mapping = {'negative': 0, 'positive': 1}
y_train_mapped = [label_mapping[label] for label in y_train]
y_test_mapped = [label_mapping[label] for label in y_test]

In [24]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = CustomDataset(train_encodings, y_train_mapped)
test_dataset = CustomDataset(test_encodings, y_test_mapped)

In [25]:
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(np.unique(y_train))).to("cuda")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [34]:
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=20,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch"
)

In [35]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision = precision_score(labels, preds, average='binary')
    recall = recall_score(labels, preds, average='binary')
    f1 = f1_score(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1946,0.310664,0.912249,0.912016,0.910163,0.913878
2,0.3251,0.352895,0.918749,0.916002,0.943339,0.890204


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1946,0.310664,0.912249,0.912016,0.910163,0.913878
2,0.3251,0.352895,0.918749,0.916002,0.943339,0.890204
3,0.1212,0.433375,0.924639,0.923647,0.931507,0.915918


TrainOutput(global_step=7383, training_loss=0.18156431705097917, metrics={'train_runtime': 6659.7575, 'train_samples_per_second': 8.869, 'train_steps_per_second': 1.109, 'total_flos': 1.554039137378304e+16, 'train_loss': 0.18156431705097917, 'epoch': 3.0})

In [36]:
results = trainer.evaluate()

bert_fine_tune_accuracy = results.get('eval_accuracy', 0)
bert_fine_tune_f1 = results.get('eval_f1', 0)
bert_fine_tune_precision = results.get('eval_precision', 0)
bert_fine_tune_recall = results.get('eval_recall', 0)

print("Evaluation Results:")
print("-" * 20)
print(f"Accuracy:  {bert_fine_tune_accuracy:.3f}")
print(f"F1 Score:  {bert_fine_tune_f1:.3f}")
print(f"Precision: {bert_fine_tune_precision:.3f}")
print(f"Recall:    {bert_fine_tune_recall:.3f}")

Evaluation Results:
--------------------
Accuracy:  0.925
F1 Score:  0.924
Precision: 0.932
Recall:    0.916


# 5) BERT (with LoRA) for review classification

In [37]:
!pip install -q evaluate peft datasets

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [41]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')

label_to_id = {'positive': 1, 'negative': 0}
train_df = pd.DataFrame({'text': X_train, 'labels': y_train.map(label_to_id)})
test_df = pd.DataFrame({'text': X_test, 'labels': y_test.map(label_to_id)})

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

tokenized_datasets = DatasetDict({
    'train': train_dataset.map(tokenize_function, batched=True),
    'test': test_dataset.map(tokenize_function, batched=True)
})

Map:   0%|          | 0/19688 [00:00<?, ? examples/s]

Map:   0%|          | 0/4923 [00:00<?, ? examples/s]

In [42]:
config = BertConfig.from_pretrained("bert-base-cased", num_labels=2)
config.lora_r = 4
config.lora_alpha = 32
config.apply_lora = True
config.lora_dropout = 0.1

model = BertForSequenceClassification.from_pretrained("bert-base-cased", config=config)

metric = load("accuracy")

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [43]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    compute_metrics=compute_metrics,
)

trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3323,0.348096,0.903311
2,0.2033,0.397424,0.904124
3,0.1081,0.46504,0.910827


TrainOutput(global_step=7383, training_loss=0.21454708508064346, metrics={'train_runtime': 2024.8869, 'train_samples_per_second': 29.169, 'train_steps_per_second': 3.646, 'total_flos': 3885097843445760.0, 'train_loss': 0.21454708508064346, 'epoch': 3.0})

In [44]:
results = trainer.evaluate()

bert_lora_accuracy = results.get('eval_accuracy', 0)
bert_lora_f1 = results.get('eval_f1', 0)
bert_lora_precision = results.get('eval_precision', 0)
bert_lora_recall = results.get('eval_recall', 0)

print("Evaluation Results:")
print("-" * 20)
print(f"Accuracy:  {bert_fine_tune_accuracy:.3f}")
print(f"F1 Score:  {bert_fine_tune_f1:.3f}")
print(f"Precision: {bert_fine_tune_precision:.3f}")
print(f"Recall:    {bert_fine_tune_recall:.3f}")

Evaluation Results:
--------------------
Accuracy:  0.925
F1 Score:  0.924
Precision: 0.932
Recall:    0.916


# 6) Results Analysis

In [46]:
scores = pd.DataFrame({
    "Method": ["TFIDF Logistic Regression", "TFIDF SVM", "TFIDF GBM",
               "Word2Vec Logistic Regression", "Word2Vec Random Forest", "Word2Vec SVM",
               "Pre-trained BERT", "Fine-Tuned BERT", "BERT with LoRa"],
    "Accuracy": [0.860, 0.862, 0.862,
                 0.792, 0.781, 0.792,
                 0.832, 0.925, 0.925],
    "Precision": [0.869, 0.878, 0.878,
                  0.798, 0.788, 0.792,
                  0.874, 0.932, 0.932],
    "Recall": [0.845, 0.838, 0.838,
               0.779, 0.766, 0.791,
               0.775, 0.916, 0.916],
    "F1 Score": [0.857, 0.858, 0.858,
                 0.788, 0.777, 0.791,
                 0.822, 0.924, 0.924]
})

scores

Unnamed: 0,Method,Accuracy,Precision,Recall,F1 Score
0,TFIDF Logistic Regression,0.86,0.869,0.845,0.857
1,TFIDF SVM,0.862,0.878,0.838,0.858
2,TFIDF GBM,0.862,0.878,0.838,0.858
3,Word2Vec Logistic Regression,0.792,0.798,0.779,0.788
4,Word2Vec Random Forest,0.781,0.788,0.766,0.777
5,Word2Vec SVM,0.792,0.792,0.791,0.791
6,Pre-trained BERT,0.832,0.874,0.775,0.822
7,Fine-Tuned BERT,0.925,0.932,0.916,0.924
8,BERT with LoRa,0.925,0.932,0.916,0.924
