In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
import torch
from transformers import TrainingArguments, Trainer
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import EarlyStoppingCallback

df = pd.read_csv('IMDB Dataset.csv')

for i in range(len(df['sentiment'])):
    if df.at[i, 'sentiment'] == 'positive':
        df.at[i, 'sentiment'] = 1
    else:
        df.at[i, 'sentiment'] = 0

groups = df.groupby(df.sentiment)
data_positive = groups.get_group(1)
data_negative = groups.get_group(0)

data_positive = data_positive.sample(frac=1.0)
data_negative = data_negative.sample(frac=1.0)

test_positive = data_positive.iloc[20000:, :]
test_negative = data_negative.iloc[20000:, :]
test_data = pd.concat([test_positive, test_negative], axis = 0, ignore_index=True).sample(frac=1)
train_positive = data_positive.iloc[:20000, :]
train_negative = data_negative.iloc[:20000, :]
train_data = pd.concat([train_positive, train_negative], axis = 0, ignore_index=True).sample(frac=1)

test_data.to_csv('test.csv', index = None)
train_data.to_csv('train.csv', index = None)

# Read data
data = pd.read_csv("train.csv")

# Define pretrained tokenizer and model
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# ----- 1. Preprocess data -----#
# Preprocess data
X = list(data["review"])
y = list(data["sentiment"])
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)
X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)

# Create torch dataset
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

train_dataset = Dataset(X_train_tokenized, y_train)
val_dataset = Dataset(X_val_tokenized, y_val)

# ----- 2. Fine-tune pretrained model -----#
# Define Trainer parameters
def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred)
    precision = precision_score(y_true=labels, y_pred=pred)
    f1 = f1_score(y_true=labels, y_pred=pred)

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

# Define Trainer
args = TrainingArguments(
    output_dir="output",
    evaluation_strategy="steps",
    eval_steps=500,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    seed=0,
    load_best_model_at_end=True,
)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

# Train pre-trained model
trainer.train()

# ----- 3. Predict -----#
# Load test data
test_data = pd.read_csv("test.csv")
X_test = list(test_data["review"])
X_test_tokenized = tokenizer(X_test, padding=True, truncation=True, max_length=512)

# Create torch dataset
test_dataset = Dataset(X_test_tokenized)

# Load trained model
model_path = "output/checkpoint-4000"
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=2)

# Define test trainer
test_trainer = Trainer(model)

# Make prediction
raw_pred, _, _ = test_trainer.predict(test_dataset)

# Preprocess raw predictions
y_pred = np.argmax(raw_pred, axis=1)

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification

{'loss': 0.7575, 'learning_rate': 4.947916666666667e-05, 'epoch': 0.03}


                                                     
  1%|          | 500/48000 [15:43<7:16:49,  1.81it/s]Saving model checkpoint to output\checkpoint-500
Configuration saved in output\checkpoint-500\config.json


{'eval_loss': 0.7360178232192993, 'eval_accuracy': 0.492625, 'eval_precision': 1.0, 'eval_recall': 0.0007385524372230429, 'eval_f1': 0.0014760147601476016, 'eval_runtime': 673.1306, 'eval_samples_per_second': 11.885, 'eval_steps_per_second': 5.942, 'epoch': 0.03}


Model weights saved in output\checkpoint-500\pytorch_model.bin
  2%|▏         | 1000/48000 [20:40<7:38:31,  1.71it/s]   ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.7204, 'learning_rate': 4.8958333333333335e-05, 'epoch': 0.06}


                                                      
  2%|▏         | 1000/48000 [32:21<7:38:31,  1.71it/s]Saving model checkpoint to output\checkpoint-1000
Configuration saved in output\checkpoint-1000\config.json


{'eval_loss': 1.2637816667556763, 'eval_accuracy': 0.767875, 'eval_precision': 0.7849573533212717, 'eval_recall': 0.7476612506154604, 'eval_f1': 0.7658555037195816, 'eval_runtime': 701.2826, 'eval_samples_per_second': 11.408, 'eval_steps_per_second': 5.704, 'epoch': 0.06}


Model weights saved in output\checkpoint-1000\pytorch_model.bin
  3%|▎         | 1500/48000 [37:21<7:45:57,  1.66it/s]    ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.754, 'learning_rate': 4.8437500000000005e-05, 'epoch': 0.09}


                                                      
  3%|▎         | 1500/48000 [49:03<7:45:57,  1.66it/s]Saving model checkpoint to output\checkpoint-1500
Configuration saved in output\checkpoint-1500\config.json


{'eval_loss': 0.6933923363685608, 'eval_accuracy': 0.50775, 'eval_precision': 0.50775, 'eval_recall': 1.0, 'eval_f1': 0.673520145912784, 'eval_runtime': 701.9344, 'eval_samples_per_second': 11.397, 'eval_steps_per_second': 5.699, 'epoch': 0.09}


Model weights saved in output\checkpoint-1500\pytorch_model.bin
  4%|▍         | 2000/48000 [54:05<7:40:27,  1.67it/s]    ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.704, 'learning_rate': 4.791666666666667e-05, 'epoch': 0.12}


                                                      
  4%|▍         | 2000/48000 [1:05:52<7:40:27,  1.67it/s]Saving model checkpoint to output\checkpoint-2000
Configuration saved in output\checkpoint-2000\config.json


{'eval_loss': 0.6930730938911438, 'eval_accuracy': 0.50775, 'eval_precision': 0.50775, 'eval_recall': 1.0, 'eval_f1': 0.673520145912784, 'eval_runtime': 706.772, 'eval_samples_per_second': 11.319, 'eval_steps_per_second': 5.66, 'epoch': 0.12}


Model weights saved in output\checkpoint-2000\pytorch_model.bin
  5%|▌         | 2500/48000 [1:10:55<7:38:27,  1.65it/s]    ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.6966, 'learning_rate': 4.739583333333333e-05, 'epoch': 0.16}


  _warn_prf(average, modifier, msg_start, len(result))
                                                        
  5%|▌         | 2500/48000 [1:22:44<7:38:27,  1.65it/s]Saving model checkpoint to output\checkpoint-2500
Configuration saved in output\checkpoint-2500\config.json


{'eval_loss': 0.6957231163978577, 'eval_accuracy': 0.49225, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_runtime': 708.704, 'eval_samples_per_second': 11.288, 'eval_steps_per_second': 5.644, 'epoch': 0.16}


Model weights saved in output\checkpoint-2500\pytorch_model.bin
  6%|▋         | 3000/48000 [1:27:48<7:29:11,  1.67it/s]    ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.6965, 'learning_rate': 4.6875e-05, 'epoch': 0.19}


                                                        
  6%|▋         | 3000/48000 [1:39:39<7:29:11,  1.67it/s]Saving model checkpoint to output\checkpoint-3000
Configuration saved in output\checkpoint-3000\config.json


{'eval_loss': 0.6930149793624878, 'eval_accuracy': 0.50775, 'eval_precision': 0.50775, 'eval_recall': 1.0, 'eval_f1': 0.673520145912784, 'eval_runtime': 710.7202, 'eval_samples_per_second': 11.256, 'eval_steps_per_second': 5.628, 'epoch': 0.19}


Model weights saved in output\checkpoint-3000\pytorch_model.bin
  7%|▋         | 3500/48000 [1:44:44<7:25:35,  1.66it/s]    ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.7147, 'learning_rate': 4.635416666666667e-05, 'epoch': 0.22}


  _warn_prf(average, modifier, msg_start, len(result))
                                                        
  7%|▋         | 3500/48000 [1:56:36<7:25:35,  1.66it/s]Saving model checkpoint to output\checkpoint-3500
Configuration saved in output\checkpoint-3500\config.json


{'eval_loss': 0.6931833028793335, 'eval_accuracy': 0.49225, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_runtime': 711.332, 'eval_samples_per_second': 11.247, 'eval_steps_per_second': 5.623, 'epoch': 0.22}


Model weights saved in output\checkpoint-3500\pytorch_model.bin
  8%|▊         | 4000/48000 [2:01:41<7:23:01,  1.66it/s]    ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.7061, 'learning_rate': 4.5833333333333334e-05, 'epoch': 0.25}


  _warn_prf(average, modifier, msg_start, len(result))
                                                        
  8%|▊         | 4000/48000 [2:13:32<7:23:01,  1.66it/s]Saving model checkpoint to output\checkpoint-4000
Configuration saved in output\checkpoint-4000\config.json


{'eval_loss': 0.701452374458313, 'eval_accuracy': 0.49225, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_runtime': 710.6084, 'eval_samples_per_second': 11.258, 'eval_steps_per_second': 5.629, 'epoch': 0.25}


Model weights saved in output\checkpoint-4000\pytorch_model.bin
  9%|▉         | 4500/48000 [2:18:37<7:17:07,  1.66it/s]    ***** Running Evaluation *****
  Num examples = 8000
  Batch size = 2


{'loss': 0.6993, 'learning_rate': 4.5312500000000004e-05, 'epoch': 0.28}


  _warn_prf(average, modifier, msg_start, len(result))
                                                        
  9%|▉         | 4500/48000 [2:30:25<7:17:07,  1.66it/s]Saving model checkpoint to output\checkpoint-4500
Configuration saved in output\checkpoint-4500\config.json


{'eval_loss': 0.6986303925514221, 'eval_accuracy': 0.49225, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_runtime': 708.369, 'eval_samples_per_second': 11.294, 'eval_steps_per_second': 5.647, 'epoch': 0.28}


Model weights saved in output\checkpoint-4500\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from output\checkpoint-3000 (score: 0.6930149793624878).
  9%|▉         | 4500/48000 [2:30:30<24:14:52,  2.01s/it]


{'train_runtime': 9030.238, 'train_samples_per_second': 10.631, 'train_steps_per_second': 5.315, 'train_loss': 0.7165679050021702, 'epoch': 0.28}


loading configuration file output/checkpoint-4000\config.json
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file output/checkpoint-4000\pytorch_model.bin
All model checkpoint weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceC