In [1]:
# install required libraries
!pip3 install transformers                  # HuggingFace library for interacting with BERT (and multiple other models)
!pip3 install accelerate                    # fast optimization with transformers
!pip3 install datasets                      # HuggingFace library to process dataframes
!pip3 install ipywidgets
!pip3 install evaluate                      # HuggingFace library to evaluate models

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

In [6]:
#### import libraries

# basic libraries
import pandas as pd
import numpy as np
import torch
import random
from IPython.core.display import HTML
from scipy.special import softmax
from sklearn.preprocessing import LabelEncoder

# libraries for plots and figures
import seaborn as sns
import matplotlib.pyplot as plt

# HuggingFace relevant classes
from transformers import AutoModel, BertModel, BertForSequenceClassification, AutoTokenizer, AutoModelForSequenceClassification, pipeline, TrainingArguments, Trainer, utils
from transformers import TextClassificationPipeline
from transformers.pipelines.base import KeyDataset
from datasets import load_dataset, Dataset, DatasetDict
import evaluate

# scikit-learn relevant classes
from sklearn.model_selection import cross_val_score, train_test_split

# test GPU
print(f"GPU: {torch.cuda.is_available()}")

GPU: True


In [3]:
import pandas as pd
file_id = "1INq3wr9DuykD4iSlUCocHegOJ5qgLP8z"
df = pd.read_csv(f"https://drive.google.com/uc?export=download&id={file_id}&authuser=0&export=download", sep="\t")
print(df.shape)
print(df.sentiment.value_counts())
df.head()

(1243, 3)
sentiment
dovish     511
neutral    489
hawkish    243
Name: count, dtype: int64


Unnamed: 0,ID,text,sentiment
0,157_1,The action was taken to cushion the effects on...,dovish
1,161_2,Such trends could foster inflationary imbalanc...,hawkish
2,52_0,The Federal Open Market Committee at its meeti...,neutral
3,21_5,Although continuing favorable trends bolster l...,dovish
4,78_7,The Committee perceives that the upside and do...,neutral


In [4]:
# perform a train/test split
labels = df[["sentiment"]]
sent_idxs = list(range(0, len(labels)))
train_idxs, test_idxs = train_test_split(sent_idxs, test_size=0.1, random_state=95)
print(f" Train sentences: {len(train_idxs)}\n", f"Test sentences: {len(test_idxs)}")

 Train sentences: 1118
 Test sentences: 125


In [7]:
# format the full data
df_finetune_full = df.copy()

df_finetune_full = df_finetune_full[["text", "sentiment"]]
df_finetune_full.columns = ["text", "labels_text"]

label_encoder_full = LabelEncoder()
df_finetune_full['labels'] = label_encoder_full.fit_transform(df_finetune_full['labels_text'])
df_finetune_full.head()

Unnamed: 0,text,labels_text,labels
0,The action was taken to cushion the effects on...,dovish,0
1,Such trends could foster inflationary imbalanc...,hawkish,1
2,The Federal Open Market Committee at its meeti...,neutral,2
3,Although continuing favorable trends bolster l...,dovish,0
4,The Committee perceives that the upside and do...,neutral,2


In [8]:
# format the train data adequately
df_finetune = df.loc[train_idxs].copy()

df_finetune = df_finetune[["text", "sentiment"]]
df_finetune.columns = ["text", "labels_text"]

label_encoder = LabelEncoder()
df_finetune['labels'] = label_encoder.fit_transform(df_finetune['labels_text'])
df_finetune.head()

Unnamed: 0,text,labels_text,labels
439,"In these circumstances, when the economic situ...",neutral,2
548,The Federal Reserve will employ all available ...,dovish,0
657,"Moreover, strains in financial markets have in...",dovish,0
986,"Tight credit conditions, the ongoing housing c...",dovish,0
620,The Federal Open Market Committee at its meeti...,neutral,2


In [9]:
# format the test data adequately
df_test = df.loc[test_idxs].copy()

df_test = df_test[["text", "sentiment"]]
df_test.columns = ["text", "labels_text"]

# Transform labels into integers
df_test['labels'] = label_encoder.transform(df_test['labels_text'])
df_test.head()

Unnamed: 0,text,labels_text,labels
1089,Higher energy and other costs have the potenti...,hawkish,1
557,The Committee will continue to monitor the eco...,neutral,2
552,The Committee perceives the upside and downsid...,neutral,2
630,The Committee expects inflation to moderate la...,hawkish,1
464,Information received since the Federal Open Ma...,dovish,0


In [10]:
# transform data into Dataset class
finetune_dataset = Dataset.from_pandas(df_finetune)
test_dataset = Dataset.from_pandas(df_test)

In [11]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [12]:
# tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], max_length=512, padding="max_length", truncation=True)

tokenized_ft = finetune_dataset.map(tokenize_function, batched=True)    # batched=True is key for training
tokenized_test = test_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/1118 [00:00<?, ? examples/s]

Map:   0%|          | 0/125 [00:00<?, ? examples/s]

In [13]:
label_dict = {label: int(idx) for label, idx in zip(label_encoder.classes_, np.arange(len(label_encoder.classes_)))}
label_dict

{'dovish': 0, 'hawkish': 1, 'neutral': 2}

In [14]:
# load the model for finetuning.
# NOTE that we use a different class from the transformers library:
# AutoModel vs. AutoModelForSequenceClassification

num_labels = len(df_finetune.groupby('labels').size())
model_ft = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased",
                                                              num_labels=num_labels,
                                                              label2id=label_dict,
                                                              output_hidden_states=False)

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
# define the main arguments for training
training_args = TrainingArguments("./",                               # path to save model
                                  learning_rate=5e-5,                 # we use a very small learning rate
                                  num_train_epochs=1,                 # number of iterations through the corpus
                                  per_device_train_batch_size=8,      # defined by the capacity of our GPU
                                  per_device_eval_batch_size=8,       # defined by the capacity of our GPU
                                  evaluation_strategy="no",
                                  save_strategy="no")



In [20]:
# define the set of metrics to be computed through the training process
def compute_metrics(eval_pred):
    metric1 = evaluate.load("precision")
    metric2 = evaluate.load("recall")
    metric3 = evaluate.load("f1")
    metric4 = evaluate.load("accuracy")

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    precision = metric1.compute(predictions=predictions, references=labels, average="micro")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="micro")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="micro")["f1"]
    accuracy = metric4.compute(predictions=predictions, references=labels)["accuracy"]

    return {"precision": precision, "recall": recall,
            "f1": f1, "accuracy": accuracy}

# by default the Trainer will use MSEloss from (torch.nn) for regression and
# CrossEntropy loss for classification
trainer = Trainer(
    model=model_ft,
    args=training_args,
    train_dataset=tokenized_ft,
    eval_dataset=tokenized_test,
    compute_metrics=compute_metrics
)

In [22]:
# train model
import wandb
wandb.init(mode="disabled")
trainer.train()

Step,Training Loss


TrainOutput(global_step=140, training_loss=0.2825418199811663, metrics={'train_runtime': 100.8999, 'train_samples_per_second': 11.08, 'train_steps_per_second': 1.388, 'total_flos': 294160801019904.0, 'train_loss': 0.2825418199811663, 'epoch': 1.0})

In [23]:
# evaluate final model on the test dataset
results = trainer.predict(tokenized_test)
final_metrics = results[2]
print(final_metrics)

Downloading builder script:   0%|          | 0.00/7.56k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.38k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.79k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

{'test_loss': 0.6000231504440308, 'test_precision': 0.848, 'test_recall': 0.848, 'test_f1': 0.848, 'test_accuracy': 0.848, 'test_runtime': 6.1166, 'test_samples_per_second': 20.436, 'test_steps_per_second': 2.616}
