In [1]:
# Install transformers package from Huggingface
!pip install transformers



In [2]:
import torch
import pandas as pd
import numpy as np


from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from transformers import AdamW
from torch.utils.data import DataLoader
from torch.nn import functional as F

## Detect GPU

In [3]:
if torch.cuda.is_available():
  device = torch.device("cuda")
  print('Using GPU ', torch.cuda.get_device_name(0)) 
else:
  device = torch.device("cpu")
  print('Using CPU')

Using GPU  Tesla V100-SXM2-16GB


## Load data and preprocess data

In [4]:
df = pd.read_csv('10K_text_price_label.csv')

print("The length for 10K data: {}".format(len(df)))

The length for 10K data: 944


In [5]:
df.Label.unique()

array([0, 2, 1])

In [6]:
df.head()

Unnamed: 0,Company,Year,Doc,Label
0,ACC,2017,10 k 1 acc2016123110k htm 10 k document unite ...,0
1,ACC,2016,10 k 1 acc2015123110k htm 10 k 10 k unite stat...,2
2,ACC,2015,10 k 1 acc2014123110k htm 10 k acc 2014 12 31 ...,1
3,ACC,2014,10 k 1 acc2013123110k htm 10 k acc 2013 12 31 ...,2
4,ACC,2013,10 k 1 t75648_10k htm form 10 k t75648_10k htm...,0


## Split dataset

In [7]:
# Split the data set
doc_data = df[['Doc']].to_numpy()
doc_data = doc_data.reshape(doc_data.shape[0])
labels = df[['Label']].to_numpy()
labels = labels.reshape(labels.shape[0])

train_texts, test_texts, train_labels, test_labels = train_test_split(doc_data, labels, test_size=0.2, shuffle=True, random_state=0)

In [9]:
print(train_labels)

[2 2 0 0 2 2 2 2 0 2 2 1 2 0 0 0 0 2 2 2 2 1 0 0 2 2 2 2 2 2 2 2 2 2 0 0 2
 0 0 1]


## Tokenize the text

In [8]:
from tokenizers import BertWordPieceTokenizer

vocab = 'voc_uniq.txt'
tokenizer = BertTokenizer(vocab)

## Train the data

In [9]:
EPOCHS = 20
BATCHES = 8
learning_rates = [1e-3, 1e-4, 1e-5]

In [10]:
# Turn labels and encodings into a Dataset object

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [11]:
# Encoding the training data
train_encoding = tokenizer(list(train_texts), return_tensors='pt', padding=True, truncation=True, max_length=30)

# Encoding the testing data
test_encoding = tokenizer(list(test_texts), return_tensors='pt', padding=True, truncation=True, max_length=30)
test_dataset = MyDataset(test_encoding, test_labels)
test_loader = DataLoader(test_dataset, batch_size=BATCHES, shuffle=False)


# Turn into dataset object
train_dataset = MyDataset(train_encoding, train_labels)

# Use mini-bathces
train_loader = DataLoader(train_dataset, batch_size=BATCHES, shuffle=True)



NameError: ignored

In [13]:
# Set current learning rate here
best_lr = 1e-5

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 102.11060512065887

Current Epoch: 2
------------------------------------------
Train loss: 99.58523297309875

Current Epoch: 3
------------------------------------------
Train loss: 100.18051385879517

Current Epoch: 4
------------------------------------------
Train loss: 99.78292000293732

Current Epoch: 5
------------------------------------------
Train loss: 100.15877145528793

Current Epoch: 6
------------------------------------------
Train loss: 100.38584458827972

Current Epoch: 7
------------------------------------------
Train loss: 100.1389040350914

Stop training because of the early stop at epoch 7


## Report the evaluation result on testing set

In [15]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

  if __name__ == '__main__':


Precison: 0.48677248677248675
Recall: 0.48677248677248675
F1 score: 0.48677248677248675
Accuracy: 0.48677248677248675


In [16]:
# Set current learning rate here
best_lr = 1e-4

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 102.34586662054062

Current Epoch: 2
------------------------------------------
Train loss: 102.1022360920906

Current Epoch: 3
------------------------------------------
Train loss: 101.73578315973282

Current Epoch: 4
------------------------------------------
Train loss: 100.83886557817459

Current Epoch: 5
------------------------------------------
Train loss: 101.27074784040451

Current Epoch: 6
------------------------------------------
Train loss: 100.65839612483978

Current Epoch: 7
------------------------------------------
Train loss: 101.88309514522552

Current Epoch: 8
------------------------------------------
Train loss: 100.74715292453766

Current Epoch: 9
------------------------------------------
Train loss: 100.01596808433533

Current Epoch: 10
------------------------------------------
Train loss: 101.4622488617897

Current Epoch: 11
------------------------------------------
Train loss: 100.8722

In [17]:
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

  if __name__ == '__main__':


Precison: 0.48677248677248675
Recall: 0.48677248677248675
F1 score: 0.48677248677248675
Accuracy: 0.48677248677248675


In [18]:
# Set current learning rate here
best_lr = 1e-6

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 99.9067815542221

Current Epoch: 2
------------------------------------------
Train loss: 100.09157240390778

Current Epoch: 3
------------------------------------------
Train loss: 100.16233003139496

Current Epoch: 4
------------------------------------------
Train loss: 99.6149154305458

Current Epoch: 5
------------------------------------------
Train loss: 99.32338970899582

Current Epoch: 6
------------------------------------------
Train loss: 100.39861404895782

Current Epoch: 7
------------------------------------------
Train loss: 99.96470230817795

Current Epoch: 8
------------------------------------------
Train loss: 100.0778598189354

Current Epoch: 9
------------------------------------------
Train loss: 99.69874411821365

Current Epoch: 10
------------------------------------------
Train loss: 99.81671100854874

Stop training because of the early stop at epoch 10


In [19]:
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

  if __name__ == '__main__':


Precison: 0.48677248677248675
Recall: 0.48677248677248675
F1 score: 0.48677248677248675
Accuracy: 0.48677248677248675


In [22]:
# Set current learning rate here
best_lr = 1e-3

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 112.64965546131134

Current Epoch: 2
------------------------------------------
Train loss: 105.52300530672073

Current Epoch: 3
------------------------------------------
Train loss: 113.17386874556541

Current Epoch: 4
------------------------------------------
Train loss: 106.17731446027756

Current Epoch: 5
------------------------------------------
Train loss: 107.82152622938156

Current Epoch: 6
------------------------------------------
Train loss: 107.28663957118988

Current Epoch: 7
------------------------------------------
Train loss: 108.04945343732834

Stop training because of the early stop at epoch 7


In [23]:
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

  if __name__ == '__main__':


Precison: 0.43386243386243384
Recall: 0.43386243386243384
F1 score: 0.43386243386243384
Accuracy: 0.43386243386243384


### Encoding length 50

In [25]:
# Encoding the training data
train_encoding = tokenizer(list(train_texts), return_tensors='pt', padding=True, truncation=True, max_length=50)

# Encoding the testing data
test_encoding = tokenizer(list(test_texts), return_tensors='pt', padding=True, truncation=True, max_length=50)
test_dataset = MyDataset(test_encoding, test_labels)
test_loader = DataLoader(test_dataset, batch_size=BATCHES, shuffle=False)


# Turn into dataset object
train_dataset = MyDataset(train_encoding, train_labels)

# Use mini-bathces
train_loader = DataLoader(train_dataset, batch_size=BATCHES, shuffle=True)

In [26]:
# Set current learning rate here
best_lr = 1e-4

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 102.08098077774048

Current Epoch: 2
------------------------------------------
Train loss: 101.13652354478836

Current Epoch: 3
------------------------------------------
Train loss: 100.77958822250366

Current Epoch: 4
------------------------------------------
Train loss: 100.42237436771393

Current Epoch: 5
------------------------------------------
Train loss: 99.8835666179657

Current Epoch: 6
------------------------------------------
Train loss: 100.40240788459778

Current Epoch: 7
------------------------------------------
Train loss: 100.12757760286331

Current Epoch: 8
------------------------------------------
Train loss: 99.86036270856857

Current Epoch: 9
------------------------------------------
Train loss: 100.02865767478943

Current Epoch: 10
------------------------------------------
Train loss: 99.84298491477966

Current Epoch: 11
------------------------------------------
Train loss: 99.9773382

In [27]:
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

  if __name__ == '__main__':


Precison: 0.48677248677248675
Recall: 0.48677248677248675
F1 score: 0.48677248677248675
Accuracy: 0.48677248677248675


In [28]:
# Set current learning rate here
best_lr = 1e-5

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 100.94346356391907

Current Epoch: 2
------------------------------------------
Train loss: 100.76950865983963

Current Epoch: 3
------------------------------------------
Train loss: 100.24304419755936

Current Epoch: 4
------------------------------------------
Train loss: 100.50902831554413

Current Epoch: 5
------------------------------------------
Train loss: 99.9561225771904

Current Epoch: 6
------------------------------------------
Train loss: 99.95437431335449

Current Epoch: 7
------------------------------------------
Train loss: 99.61865335702896

Current Epoch: 8
------------------------------------------
Train loss: 99.83063042163849

Current Epoch: 9
------------------------------------------
Train loss: 99.94023770093918

Current Epoch: 10
------------------------------------------
Train loss: 99.15044224262238

Current Epoch: 11
------------------------------------------
Train loss: 99.6125440597

In [29]:
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

  if __name__ == '__main__':


Precison: 0.48677248677248675
Recall: 0.48677248677248675
F1 score: 0.48677248677248675
Accuracy: 0.48677248677248675


In [30]:
# Set current learning rate here
best_lr = 1e-6

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 101.31260031461716

Current Epoch: 2
------------------------------------------
Train loss: 99.91304397583008

Current Epoch: 3
------------------------------------------
Train loss: 99.96644991636276

Current Epoch: 4
------------------------------------------
Train loss: 100.3437032699585

Current Epoch: 5
------------------------------------------
Train loss: 98.9865118265152

Current Epoch: 6
------------------------------------------
Train loss: 99.35534924268723

Current Epoch: 7
------------------------------------------
Train loss: 99.77695482969284

Current Epoch: 8
------------------------------------------
Train loss: 99.81931245326996

Current Epoch: 9
------------------------------------------
Train loss: 99.29109394550323

Current Epoch: 10
------------------------------------------
Train loss: 99.29612857103348

Stop training because of the early stop at epoch 10


In [31]:
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

  if __name__ == '__main__':


Precison: 0.48677248677248675
Recall: 0.48677248677248675
F1 score: 0.48677248677248675
Accuracy: 0.48677248677248675


## Encoding length 60

In [32]:
# Encoding the training data
train_encoding = tokenizer(list(train_texts), return_tensors='pt', padding=True, truncation=True, max_length=60)

# Encoding the testing data
test_encoding = tokenizer(list(test_texts), return_tensors='pt', padding=True, truncation=True, max_length=60)
test_dataset = MyDataset(test_encoding, test_labels)
test_loader = DataLoader(test_dataset, batch_size=BATCHES, shuffle=False)


# Turn into dataset object
train_dataset = MyDataset(train_encoding, train_labels)

# Use mini-bathces
train_loader = DataLoader(train_dataset, batch_size=BATCHES, shuffle=True)

In [33]:
# Set current learning rate here
best_lr = 1e-4

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 100.5548887848854

Current Epoch: 2
------------------------------------------
Train loss: 101.72064644098282

Current Epoch: 3
------------------------------------------
Train loss: 100.79745942354202

Current Epoch: 4
------------------------------------------
Train loss: 100.60223823785782

Current Epoch: 5
------------------------------------------
Train loss: 100.14266192913055

Current Epoch: 6
------------------------------------------
Train loss: 101.54773724079132

Current Epoch: 7
------------------------------------------
Train loss: 101.1067144870758

Current Epoch: 8
------------------------------------------
Train loss: 100.57700783014297

Current Epoch: 9
------------------------------------------
Train loss: 100.46186876296997

Current Epoch: 10
------------------------------------------
Train loss: 100.69149053096771

Stop training because of the early stop at epoch 10
Precison: 0.48677248677248675

In [34]:
# Set current learning rate here
best_lr = 1e-5

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 100.22936367988586

Current Epoch: 2
------------------------------------------
Train loss: 100.50483506917953

Current Epoch: 3
------------------------------------------
Train loss: 99.00542706251144

Current Epoch: 4
------------------------------------------
Train loss: 99.68203139305115

Current Epoch: 5
------------------------------------------
Train loss: 100.07219403982162

Current Epoch: 6
------------------------------------------
Train loss: 99.46432965993881

Current Epoch: 7
------------------------------------------
Train loss: 99.74774277210236

Current Epoch: 8
------------------------------------------
Train loss: 100.82446074485779

Stop training because of the early stop at epoch 8
Precison: 0.48677248677248675
Recall: 0.48677248677248675
F1 score: 0.48677248677248675
Accuracy: 0.48677248677248675


In [35]:
# Set current learning rate here
best_lr = 1e-6

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Current Epoch: 1
------------------------------------------
Train loss: 100.42273378372192

Current Epoch: 2
------------------------------------------
Train loss: 99.82439410686493

Current Epoch: 3
------------------------------------------
Train loss: 99.67061275243759

Current Epoch: 4
------------------------------------------
Train loss: 99.76746356487274

Current Epoch: 5
------------------------------------------
Train loss: 99.71128237247467

Current Epoch: 6
------------------------------------------
Train loss: 99.23172569274902

Current Epoch: 7
------------------------------------------
Train loss: 99.06269037723541

Current Epoch: 8
------------------------------------------
Train loss: 99.05891364812851

Current Epoch: 9
------------------------------------------
Train loss: 99.33373576402664

Current Epoch: 10
------------------------------------------
Train loss: 99.37162804603577

Current Epoch: 11
------------------------------------------
Train loss: 99.310489654541

### Encoding length 20

In [None]:
# Encoding the training data
train_encoding = tokenizer(list(train_texts), return_tensors='pt', padding=True, truncation=True, max_length=20)

# Encoding the testing data
test_encoding = tokenizer(list(test_texts), return_tensors='pt', padding=True, truncation=True, max_length=20)
test_dataset = MyDataset(test_encoding, test_labels)
test_loader = DataLoader(test_dataset, batch_size=BATCHES, shuffle=False)


# Turn into dataset object
train_dataset = MyDataset(train_encoding, train_labels)

# Use mini-bathces
train_loader = DataLoader(train_dataset, batch_size=BATCHES, shuffle=True)

In [None]:
# Set current learning rate here
best_lr = 1e-4

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

In [None]:
# Set current learning rate here
best_lr = 1e-5

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

In [None]:
# Set current learning rate here
best_lr = 1e-6

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

## Encoding length 30

In [None]:
# Encoding the training data
train_encoding = tokenizer(list(train_texts), return_tensors='pt', padding=True, truncation=True, max_length=30)

# Encoding the testing data
test_encoding = tokenizer(list(test_texts), return_tensors='pt', padding=True, truncation=True, max_length=30)
test_dataset = MyDataset(test_encoding, test_labels)
test_loader = DataLoader(test_dataset, batch_size=BATCHES, shuffle=False)


# Turn into dataset object
train_dataset = MyDataset(train_encoding, train_labels)

# Use mini-bathces
train_loader = DataLoader(train_dataset, batch_size=BATCHES, shuffle=True)

In [None]:
# Set current learning rate here
best_lr = 1e-4

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

In [None]:
# Set current learning rate here
best_lr = 1e-5

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))

In [None]:
# Set current learning rate here
best_lr = 1e-6

# Bert model from Huggingface
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)

# Set the optimizer AdamW
optimizer = AdamW(model.parameters(), lr=best_lr)

# Implement early stopping
min_loss = float('inf')
epoch_count = 0
early_stop = False

# device = torch.device("cpu")

# Put the model on device
model.to(device)

for epoch in range(EPOCHS):
  # Put the model in training mode
  model.train()

  train_loss = 0

  for batch in train_loader:
    optimizer.zero_grad()
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    batch_labels = batch['labels'].to(device)
    outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
    # Use cross entropy loss
    #loss = F.cross_entropy(outputs.logits, batch_labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    train_loss += loss.item()
  
  print("Current Epoch: {}".format(epoch + 1))
  print("------------------------------------------")
  print("Train loss: {}".format(train_loss))
  print()
  
  # Check whether to stop or not
  min_loss = min(train_loss, min_loss)
  if min_loss < train_loss:
    if epoch_count == 4:
      early_stop = True
      print("Stop training because of the early stop at epoch {}".format(epoch + 1))
      break
    else:
      epoch_count += 1
  else:
    # Reset the count
    epoch_count = 0
  
with torch.no_grad():
    total_loss = 0
    y_pred = None
    for batch in test_loader:
        
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      batch_labels = batch['labels'].to(device)
      output = model(input_ids, attention_mask=attention_mask, labels=batch_labels)

      _, predicted_labels = torch.max(output.logits, 1)
      if y_pred is not None:
          y_pred = torch.cat((y_pred, predicted_labels), 0)
      else:
          y_pred = predicted_labels
 
precision, recall, f1, _ = precision_recall_fscore_support(test_labels, y_pred.cpu(), average='micro')
acc = accuracy_score(test_labels, y_pred.cpu())
print('Precison: {}'.format(precision))
print('Recall: {}'.format(recall))
print('F1 score: {}'.format(f1))
print('Accuracy: {}'.format(acc))