In [40]:
import torch
import accelerate
import transformers
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import DataLoader, Dataset, random_split
import numpy as np
import datasets
from sklearn.metrics import confusion_matrix, accuracy_score

In [23]:
print(np.__version__)
print(torch.__version__)
print(transformers.__version__)
print(datasets.__version__)
print(accelerate.__version__)

1.25.1
2.3.1+cpu
4.41.2
2.20.0
0.31.0


# BERT for Binary classification (Adding last softmax layer at the end)

### Example 1: A few samples (update the whole weights)

In [16]:
# Load the tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


# Sample data
texts = ["I love programming.", "I hate bugs.", "Machine learning is fascinating.", "Debugging is frustrating.", "I enjoy debugging."]
labels = [1, 0, 1, 0, 1]  # 1 for positive, 0 for negative

# Tokenize the data
train_encodings = tokenizer(texts, truncation=True, padding=True, max_length=128)

texts_val = ["I enjoy debugging.", "Programming can be tough."]
val_encodings = tokenizer(texts_val, truncation=True, padding=True, max_length=128)
labels_val = [1, 0]


# Create dataset objects
train_dataset = CustomDataset(train_encodings, labels)
val_dataset = CustomDataset(val_encodings, labels_val)  # Use the same data for validation for this example

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=50,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=5,
    evaluation_strategy="steps",  # This might still throw a warning, but you can try 'eval_strategy' instead
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Fine-tune the model
trainer.train()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
5,0.6948,0.700712
10,0.6888,0.699288
15,0.6587,0.697982
20,0.6234,0.691195
25,0.641,0.68163
30,0.6609,0.663044
35,0.6165,0.635565
40,0.5722,0.596927
45,0.5473,0.563259
50,0.513,0.521652


TrainOutput(global_step=50, training_loss=0.6216712760925293, metrics={'train_runtime': 97.6657, 'train_samples_per_second': 2.56, 'train_steps_per_second': 0.512, 'total_flos': 1027777560000.0, 'train_loss': 0.6216712760925293, 'epoch': 50.0})

#### evaluate the model on test dataset

In [19]:
texts_test = ["I enjoy debugging.", "Programming can be tough.", "I hate debugging."]
new_encodings = tokenizer(texts_test, truncation=True, padding=True, max_length=128)
labels_test = [1, 0, 0]

test_dataset = CustomDataset(new_encodings, labels_test)  # Use the same data for validation for this example
predictions = trainer.predict(test_dataset)

# Convert predictions to PyTorch tensor
predictions_tensor = torch.tensor(predictions.predictions)

# Get the predicted labels
predicted_labels = torch.argmax(predictions_tensor, dim=1)
print(predicted_labels)

tensor([1, 0, 1])


### Example 2: 100 samples from IMDB (update the whole weights), each sample just 4 tokens

In [25]:
dataset = load_dataset("stanfordnlp/imdb")

train_texts = dataset['train']['text']
train_labels = dataset['train']['label']

test_texts = dataset['test']['text']
test_labels = dataset['test']['label']

In [26]:
labels = train_labels[500:550]+train_labels[15000:15050]
texts = train_texts[500:550]+ train_texts[15000:15050]

In [29]:
len(labels)

100

In [31]:
# Load the tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


# Tokenize the data
train_encodings = tokenizer(texts, truncation=True, padding=True, max_length=4)
train_dataset = CustomDataset(train_encodings, labels)

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=5,
    evaluation_strategy="steps",  # This might still throw a warning, but you can try 'eval_strategy' instead
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
)

# Fine-tune the model
trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
5,0.758,0.704411
10,0.6856,0.702425
15,0.7479,0.699309
20,0.7154,0.694544
25,0.6868,0.688574
30,0.6881,0.680474
35,0.651,0.670621
40,0.7145,0.657397
45,0.6715,0.643004
50,0.6596,0.629927


TrainOutput(global_step=65, training_loss=0.6827584193303035, metrics={'train_runtime': 147.7578, 'train_samples_per_second': 3.384, 'train_steps_per_second': 0.44, 'total_flos': 1027777560000.0, 'train_loss': 0.6827584193303035, 'epoch': 5.0})

#### evaluate the model on test dataset

In [33]:
labels_test = test_labels[500:550]+test_labels[15000:15050]
texts_test = test_texts[500:550]+ test_texts[15000:15050]

In [34]:
test_encodings = tokenizer(texts_test, truncation=True, padding=True, max_length=4)
test_dataset = CustomDataset(test_encodings, labels_test)  # Use the same data for validation for this example
predictions = trainer.predict(test_dataset)

# Convert predictions to PyTorch tensor
predictions_tensor = torch.tensor(predictions.predictions)

# Get the predicted labels
predicted_labels = torch.argmax(predictions_tensor, dim=1)
print(predicted_labels)

tensor([1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
        1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1,
        1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
        0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        0, 1, 0, 1])


In [42]:
conf_matrix = confusion_matrix(labels_test, predicted_labels.numpy())
conf_matrix

array([[20, 30],
       [23, 27]], dtype=int64)

In [46]:
accuracy = accuracy_score(labels_test, predicted_labels.numpy())
print(f'{accuracy*100} %')

47.0 %


### Example 3: 100 samples from IMDB (update the whole weights), each sample just 16 tokens

In [47]:
# Load the tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


# Tokenize the data
train_encodings = tokenizer(texts, truncation=True, padding=True, max_length=16)
train_dataset = CustomDataset(train_encodings, labels)

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=5,
    evaluation_strategy="steps",  # This might still throw a warning, but you can try 'eval_strategy' instead
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
)

# Fine-tune the model
trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
5,0.7532,0.866059
10,0.919,0.853293
15,0.8565,0.829978
20,0.819,0.802827
25,0.8478,0.766256
30,0.7812,0.727027
35,0.7246,0.691155
40,0.7119,0.654524
45,0.6483,0.628223
50,0.6855,0.602586


TrainOutput(global_step=65, training_loss=0.7346512574415941, metrics={'train_runtime': 276.6476, 'train_samples_per_second': 1.807, 'train_steps_per_second': 0.235, 'total_flos': 4111110240000.0, 'train_loss': 0.7346512574415941, 'epoch': 5.0})

#### evaluate the model on test dataset

In [48]:
test_encodings = tokenizer(texts_test, truncation=True, padding=True, max_length=16)
test_dataset = CustomDataset(test_encodings, labels_test)  # Use the same data for validation for this example
predictions = trainer.predict(test_dataset)

# Convert predictions to PyTorch tensor
predictions_tensor = torch.tensor(predictions.predictions)

# Get the predicted labels
predicted_labels = torch.argmax(predictions_tensor, dim=1)
print(predicted_labels)

tensor([0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1,
        0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0,
        1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
        0, 0, 1, 0])


In [49]:
conf_matrix = confusion_matrix(labels_test, predicted_labels.numpy())
conf_matrix

array([[24, 26],
       [28, 22]], dtype=int64)

In [50]:
accuracy = accuracy_score(labels_test, predicted_labels.numpy())
print(f'{accuracy*100} %')

46.0 %


### Example 4: 100 samples from IMDB (update the whole weights), each sample just 32 tokens

In [52]:
# Load the tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


# Tokenize the data
train_encodings = tokenizer(texts, truncation=True, padding=True, max_length=32)
train_dataset = CustomDataset(train_encodings, labels)

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=5,
    evaluation_strategy="steps",  # This might still throw a warning, but you can try 'eval_strategy' instead
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
)

# Fine-tune the model
trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
5,0.6715,0.789865
10,0.8584,0.781243
15,0.7353,0.765248
20,0.7356,0.743247
25,0.8111,0.714885
30,0.7241,0.683549
35,0.6723,0.659176
40,0.6242,0.63992
45,0.6673,0.614588
50,0.6373,0.594895


TrainOutput(global_step=65, training_loss=0.6906496598170354, metrics={'train_runtime': 445.457, 'train_samples_per_second': 1.122, 'train_steps_per_second': 0.146, 'total_flos': 8222220480000.0, 'train_loss': 0.6906496598170354, 'epoch': 5.0})

In [None]:
#### evaluate the model on test dataset

In [53]:
test_encodings = tokenizer(texts_test, truncation=True, padding=True, max_length=32)
test_dataset = CustomDataset(test_encodings, labels_test)  # Use the same data for validation for this example
predictions = trainer.predict(test_dataset)

# Convert predictions to PyTorch tensor
predictions_tensor = torch.tensor(predictions.predictions)

# Get the predicted labels
predicted_labels = torch.argmax(predictions_tensor, dim=1)
print(predicted_labels)

tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0,
        0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
        1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
        1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1,
        0, 1, 0, 0])


In [54]:
conf_matrix = confusion_matrix(labels_test, predicted_labels.numpy())
conf_matrix

array([[32, 18],
       [31, 19]], dtype=int64)

In [55]:
accuracy = accuracy_score(labels_test, predicted_labels.numpy())
print(f'{accuracy*100} %')

51.0 %


### Example 5: 300 samples from IMDB (update the whole weights), each sample just 64 tokens

In [57]:
labels = train_labels[500:650]+train_labels[15000:15150]
texts = train_texts[500:650]+ train_texts[15000:15150]

In [58]:
len(labels)

300

In [59]:
# Load the tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


# Tokenize the data
train_encodings = tokenizer(texts, truncation=True, padding=True, max_length=64)
train_dataset = CustomDataset(train_encodings, labels)

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=20,
    per_device_train_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    evaluation_strategy="steps",  # This might still throw a warning, but you can try 'eval_strategy' instead
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
)

# Fine-tune the model
trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
10,0.7446,0.776092
20,0.7511,0.741113
30,0.6891,0.703499
40,0.6976,0.667485
50,0.6676,0.649557
60,0.6945,0.627668
70,0.6642,0.602618
80,0.6336,0.576123
90,0.6141,0.53776
100,0.5494,0.476585


TrainOutput(global_step=760, training_loss=0.13700935268425382, metrics={'train_runtime': 11355.7565, 'train_samples_per_second': 0.528, 'train_steps_per_second': 0.067, 'total_flos': 197333291520000.0, 'train_loss': 0.13700935268425382, 'epoch': 20.0})

#### evaluate the model on test dataset

In [61]:
test_encodings = tokenizer(texts_test, truncation=True, padding=True, max_length=64)
test_dataset = CustomDataset(test_encodings, labels_test)  # Use the same data for validation for this example
predictions = trainer.predict(test_dataset)

# Convert predictions to PyTorch tensor
predictions_tensor = torch.tensor(predictions.predictions)

# Get the predicted labels
predicted_labels = torch.argmax(predictions_tensor, dim=1)
print(predicted_labels)

tensor([0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
        0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1,
        1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
        1, 1, 1, 1])


In [62]:
conf_matrix = confusion_matrix(labels_test, predicted_labels.numpy())
conf_matrix

array([[34, 16],
       [18, 32]], dtype=int64)

In [63]:
accuracy = accuracy_score(labels_test, predicted_labels.numpy())
print(f'{accuracy*100} %')

66.0 %


### Example 6: 300 samples from IMDB (update the last layer weights), each sample just 64 tokens

In [65]:
labels = train_labels[500:650]+train_labels[15000:15150]
texts = train_texts[500:650]+ train_texts[15000:15150]

In [66]:
# Load the tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


# Tokenize the data
train_encodings = tokenizer(texts, truncation=True, padding=True, max_length=64)
train_dataset = CustomDataset(train_encodings, labels)

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Freeze the BERT model parameters
for param in model.bert.parameters():
    param.requires_grad = False


# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=20,
    per_device_train_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    evaluation_strategy="steps",  # This might still throw a warning, but you can try 'eval_strategy' instead
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=train_dataset,
)

# Fine-tune the model
trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
10,0.7046,0.703419
20,0.7121,0.703203
30,0.7248,0.702769
40,0.6683,0.702563
50,0.7091,0.70225
60,0.7017,0.701815
70,0.7069,0.701335
80,0.6854,0.701067
90,0.713,0.700565
100,0.701,0.699614


TrainOutput(global_step=760, training_loss=0.6956515613355134, metrics={'train_runtime': 8097.5411, 'train_samples_per_second': 0.741, 'train_steps_per_second': 0.094, 'total_flos': 197333291520000.0, 'train_loss': 0.6956515613355134, 'epoch': 20.0})

#### evaluate the model on test dataset

In [67]:
test_encodings = tokenizer(texts_test, truncation=True, padding=True, max_length=64)
test_dataset = CustomDataset(test_encodings, labels_test)  # Use the same data for validation for this example
predictions = trainer.predict(test_dataset)

# Convert predictions to PyTorch tensor
predictions_tensor = torch.tensor(predictions.predictions)

# Get the predicted labels
predicted_labels = torch.argmax(predictions_tensor, dim=1)
print(predicted_labels)

tensor([1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
        0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
        1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,
        1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
        1, 1, 1, 1])


In [68]:
conf_matrix = confusion_matrix(labels_test, predicted_labels.numpy())
conf_matrix

array([[22, 28],
       [12, 38]], dtype=int64)

In [69]:
accuracy = accuracy_score(labels_test, predicted_labels.numpy())
print(f'{accuracy*100} %')

60.0 %


# Conclusion:

In [3]:
import pandas as pd

# Create a dictionary with the data
data = {
    "Update Weights": ["All layer", "All layer", "All layer", "All layer", "All layer", "Last layer"],
    "Dataset": ["Random", "IMDB", "IMDB", "IMDB", "IMDB", "IMDB"],
    "Number of training sample": [5, 100, 100, 100, 300, 300],
    "Number of token": [128, 4, 16, 32, 64, 64],
    "Epoch": [50, 5, 5, 5, 20, 20],
    "Training time (s)": [97.66, 147.75, 276.64, 445.45, 11355.75, 8097.54],
    "Number of test samples": [3, 100, 100, 100, 300, 300],
    "Accuracy": ["66%", "47%", "46%", "51%", "66%", "60%"]
}

# Create a DataFrame from the dictionary
result = pd.DataFrame(data)
result


Unnamed: 0,Update Weights,Dataset,Number of training sample,Number of token,Epoch,Training time (s),Number of test samples,Accuracy
0,All layer,Random,5,128,50,97.66,3,66%
1,All layer,IMDB,100,4,5,147.75,100,47%
2,All layer,IMDB,100,16,5,276.64,100,46%
3,All layer,IMDB,100,32,5,445.45,100,51%
4,All layer,IMDB,300,64,20,11355.75,300,66%
5,Last layer,IMDB,300,64,20,8097.54,300,60%
