## This notebook contains demo for the fine-tuned Analyst Tone model. yya518 fine-tuned FinBERT model on 10,000 manually annotated analyst statements. I use this script and infer sentiment on my customized dataset.

In [None]:
# download pre-trained and fine-tuned weights, unzip to the working directory
# https://gohkust-my.sharepoint.com/:u:/g/personal/imyiyang_ust_hk/EQJGiEOkhIlBqlW63TbKA3gBCYgDDcHlBCB7VTXIUMmyiA

In [6]:
import os
import copy
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM, BertConfig
from bertModel import BertClassification, dense_opt
from datasets import text_dataset, financialPhraseBankDataset
import argparse
from sklearn.metrics import f1_score


In [14]:
labels = {0:'neutral', 1:'positive',2:'negative'}
num_labels= len(labels)
vocab = "finance-uncased"
#vocab_path = "/home/root/new_bert_vocab_uncased_8000"
vocab_path = 'analyst_tone/vocab'
pretrained_weights_path = "analyst_tone/pretrained_weights" # this is pre-trained FinBERT weights
fine_tuned_weight_path = "analyst_tone/fine_tuned.pth"      # this is fine-tuned FinBERT weights
max_seq_length=256
device = torch.device('cuda')

In [8]:
model = BertClassification(weight_path= pretrained_weights_path, num_labels=num_labels, vocab=vocab)

  nn.init.xavier_normal(self.classifier.weight)


In [16]:
model.load_state_dict(torch.load(fine_tuned_weight_path, map_location='cuda:0'))
model.to(device)

BertClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30873, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )


# 0 is neutral, 1 is positive, and 2 is negative 

In [35]:
sentences = ['le Reports Second Quarter ResultsTotal Revenue Grows, Services Revenue Reaches New All-Time High of $13.3 BillionCUPERTINO, California — April 30, 2020 — Apple® today announced financial results for its fiscal 2020 second quarter ended March 28, 2020.', 'The Company posted quarterly revenue of $58.3 billion, an increase of 1 percent from the year-ago quarter, and quarterly earnings per diluted share of $2.55, up 4 percent.', 'International sales accounted for 62 percent of the quarter’s revenue.“Despite COVID-19’s unprecedented global impact, we’re proud to report that Apple grew for the quarter, driven by an all-time record in Services and a quarterly record for Wearables,” said Tim Cook, Apple’s CEO.', '“In this difficult environment, our users are depending on Apple products in renewed ways to stay connected, informed, creative, and productive.', 'We feel motivated and inspired to not only keep meeting these needs i']

In [36]:
tokenizer = BertTokenizer(vocab_file = vocab_path, do_lower_case = True, do_basic_tokenize = True)

In [37]:
model.eval()
for sent in sentences: 
    tokenized_sent = tokenizer.tokenize(sent)
    if len(tokenized_sent) > max_seq_length:
        tokenized_sent = tokenized_sent[:max_seq_length]
    
    ids_review  = tokenizer.convert_tokens_to_ids(tokenized_sent)
    mask_input = [1]*len(ids_review)        
    padding = [0] * (max_seq_length - len(ids_review))
    ids_review += padding
    mask_input += padding
    input_type = [0]*max_seq_length
    
    input_ids = torch.tensor(ids_review).to(device).reshape(-1, 256)
    attention_mask =  torch.tensor(mask_input).to(device).reshape(-1, 256)
    token_type_ids = torch.tensor(input_type).to(device).reshape(-1, 256)
    
    with torch.set_grad_enabled(False):
        outputs = model(input_ids, token_type_ids, attention_mask)
        outputs = F.softmax(outputs,dim=1)
        print(sent, '\nFinBERT predicted sentiment: ', labels[torch.argmax(outputs).item()], '\n')
    

le Reports Second Quarter ResultsTotal Revenue Grows, Services Revenue Reaches New All-Time High of $13.3 BillionCUPERTINO, California — April 30, 2020 — Apple® today announced financial results for its fiscal 2020 second quarter ended March 28, 2020. 
FinBERT predicted sentiment:  positive 

The Company posted quarterly revenue of $58.3 billion, an increase of 1 percent from the year-ago quarter, and quarterly earnings per diluted share of $2.55, up 4 percent. 
FinBERT predicted sentiment:  positive 

International sales accounted for 62 percent of the quarter’s revenue.“Despite COVID-19’s unprecedented global impact, we’re proud to report that Apple grew for the quarter, driven by an all-time record in Services and a quarterly record for Wearables,” said Tim Cook, Apple’s CEO. 
FinBERT predicted sentiment:  positive 

“In this difficult environment, our users are depending on Apple products in renewed ways to stay connected, informed, creative, and productive. 
FinBERT predicted sent