### Pretrained FinBERT model on earnings call transcripts (q&a ensemble)

Load data:

In [None]:
from google.colab import files
upload = files.upload()

Saving accolade-inc-accd-q3-2021-earnings-call-transcript.json to accolade-inc-accd-q3-2021-earnings-call-transcript.json
Saving acuity-brands-inc-ayi-q1-2021-earnings-call-transc.json to acuity-brands-inc-ayi-q1-2021-earnings-call-transc.json
Saving albertsons-companies-inc-aci-q3-2020-earnings-call.json to albertsons-companies-inc-aci-q3-2020-earnings-call.json
Saving angiodynamics-inc-ango-q2-2021-earnings-call-trans.json to angiodynamics-inc-ango-q2-2021-earnings-call-trans.json
Saving aphria-inc-apha-q2-2021-earnings-call-transcript.json to aphria-inc-apha-q2-2021-earnings-call-transcript.json
Saving audiovox-voxx-q3-2021-earnings-call-transcript.json to audiovox-voxx-q3-2021-earnings-call-transcript.json
Saving azz-inc-azz-q3-2021-earnings-call-transcript.json to azz-inc-azz-q3-2021-earnings-call-transcript.json
Saving bed-bath-beyond-bbby-q3-2020-earnings-call-transcr.json to bed-bath-beyond-bbby-q3-2020-earnings-call-transcr.json
Saving blackrock-blk-q4-2020-earnings-call-trans

Imports:

In [None]:
import os
import json
import pandas
import numpy as np
from collections import Counter

import nltk.data
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import classification_report

Preprocess data:

In [None]:
directory = '/content'
transcript_data = []
input_data = []
input_labels = []
label_map = {0:'neutral', 1:'positive', 2:'negative'}

nltk.download('punkt')
sent_tokenizer = nltk.data.load('tokenizers/punkt/PY3/english.pickle')

for filename in os.listdir(directory):
  f = os.path.join(directory, filename)
  if os.path.isfile(f) and f.endswith('.json'):

    # Iterate over .json files
    with open(f) as file:
      transcript_data.append(json.load(file))

      # Extract q&a answers from transcript
      answers = [x['text'] for x in transcript_data[-1]['text_blocks'] if x['section'] == "Questions and Answers" and x['speaker'] != "Operator" and x['speaker'][-7:] != "Analyst"]

      # split text into groups within max input length (512 characters), maintaining complete sentences and without combining adjacent answers
      input_data.append([])
      for a in answers:
        sentences = sent_tokenizer.tokenize(a)
        text_chunk = ""
        for sent in sentences:
          if len(text_chunk) + len(sent) <= 512:
            text_chunk += sent
          else:
            input_data[-1].append(text_chunk)
            text_chunk = sent
        input_data[-1].append(text_chunk)

      # Extract stock prices and volatility
      price_before = transcript_data[-1]['closing_price_day_before'][-1]
      price_day_of = transcript_data[-1]['closing_price_day_of'][-1]
      price_after = transcript_data[-1]['closing_price_day_after'][-1]
      price_volatility = transcript_data[-1]['daily_volatility']

      # Get stock direction
      price_difference = price_after - price_before
      volatility_difference = price_day_of * price_volatility
      if abs(price_difference) - volatility_difference <= 0:
        # No price change (within volatility range)
        input_labels.append(0)
      elif price_difference > 0:
        # Price increase
        input_labels.append(1)
      else:
        # Price decrease
        input_labels.append(2)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
print(input_labels)

[1, 0, 2, 2, 2, 1, 2, 0, 0, 2, 0, 1, 2, 2, 1, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 1, 1, 2, 0, 1, 1, 0, 1, 2, 2, 1, 1, 1, 1, 1, 0, 2, 0, 1, 2, 2, 0, 0, 2, 2, 0]


Experiment using pretrained finBERT model:

In [None]:
# Create tokenizer and model
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3)

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

In [None]:
# Chunk data to save RAM
def chunks(lst, n):
  for i in range(0, len(lst), n):
    yield lst[i:i + n]

In [None]:
counts = []
batch_size = 15
for x in input_data:
  ensemble_labels = []
  for batch in chunks(x, batch_size):
    # Tokenize input data
    inputs = tokenizer(batch, padding = True, truncation = True, max_length = 512, return_tensors='pt')

    # Run model and get outputs
    outputs = finbert(**inputs)

    # Get output labels
    for result in outputs['logits']:
      result = result.tolist()
      ensemble_labels.append(result.index(max(result)))

  # Get majority vote
  counts.append(Counter(ensemble_labels))

In [None]:
print(counts)

[Counter({0: 32, 1: 11}), Counter({0: 41, 1: 41, 2: 6}), Counter({0: 16, 1: 5, 2: 1}), Counter({1: 13, 0: 6, 2: 1}), Counter({1: 28, 0: 19, 2: 6}), Counter({1: 22, 0: 21, 2: 3}), Counter({0: 109, 1: 44, 2: 17}), Counter({0: 36, 1: 31, 2: 3}), Counter({1: 67, 0: 36, 2: 9}), Counter({1: 32, 0: 24, 2: 3}), Counter({1: 35, 0: 27, 2: 3}), Counter({0: 44, 1: 40, 2: 10}), Counter({0: 45, 1: 11, 2: 3}), Counter({1: 34, 0: 9, 2: 2}), Counter({0: 119, 1: 25, 2: 2}), Counter({1: 29, 0: 22, 2: 5}), Counter({0: 18, 1: 17, 2: 1}), Counter({0: 23, 1: 20, 2: 5}), Counter({1: 34, 0: 25, 2: 7}), Counter({0: 7, 1: 7, 2: 2}), Counter({1: 50, 0: 43, 2: 7}), Counter({0: 23, 1: 18, 2: 7}), Counter({0: 26, 1: 19, 2: 7}), Counter({1: 39, 0: 15, 2: 4}), Counter({0: 35, 1: 22, 2: 13}), Counter({0: 19, 1: 19, 2: 2}), Counter({1: 28, 0: 18, 2: 2}), Counter({0: 27, 1: 8, 2: 4}), Counter({0: 101, 1: 55, 2: 6}), Counter({0: 28, 1: 17, 2: 2}), Counter({1: 29, 0: 13, 2: 4}), Counter({0: 47, 1: 26, 2: 5}), Counter({1: 6

In [None]:
output_labels = []
for c in counts:
  output_labels.append(c.most_common(1)[0][0])
print(output_labels)

[0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1]


In [None]:
# Generate and print performance metrics
target_names = ['no change', 'increase', 'decrease']
print(classification_report(input_labels, output_labels, target_names=target_names, digits=3))

              precision    recall  f1-score   support

   no change      0.346     0.500     0.409        18
    increase      0.280     0.467     0.350        15
    decrease      0.000     0.000     0.000        18

    accuracy                          0.314        51
   macro avg      0.209     0.322     0.253        51
weighted avg      0.205     0.314     0.247        51



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
