# Analysis of FinTwitBERT-sentiment
This notebook provides an analysis of the FinTwitBERT-sentiment dataset. The dataset is a collection of tweets from the financial domain, which have been annotated with sentiment labels. The dataset is available on [HuggingFace](https://huggingface.co/datasets/TimKoornstra/financial-tweets-sentiment).

## Interpretability Methods
The notebook uses the following interpretability methods:
- LIME
- SHAP

## Setup
The notebook uses the following libraries, not mentioned in the requirements.txt:
- ipywidgets
- lime
- shap

To install these libraries, run the following command:
```
pip install ipywidgets lime shap
```

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

tokenizer = AutoTokenizer.from_pretrained("StephanAkkerman/FinTwitBERT-sentiment")
labels = ["NEUTRAL", "BULLISH", "BEARISH"]
model = AutoModelForSequenceClassification.from_pretrained(
    "StephanAkkerman/FinTwitBERT-sentiment",
    num_labels=len(labels),
    id2label={k: v for k, v in enumerate(labels)},
    label2id={v: k for k, v in enumerate(labels)},
)

pipe = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0,
    top_k=None #return_all_scores=True,
)

In [None]:
# LIME implementation based on: https://github.com/marcotcr/lime/issues/356#issuecomment-752983134
import torch.nn.functional as F
from lime.lime_text import LimeTextExplainer
import numpy as np
str_to_predict = "surprising increase in revenue in spite of decrease in market share"

def predictor(texts):
    # Extract scores in same order as labels
    scores = [[next(d['score'] for d in inner_list if d['label'] == label) for label in labels] for inner_list in pipe(texts)]
    return np.array(scores)

lime_explainer = LimeTextExplainer(class_names=labels)

exp = lime_explainer.explain_instance(
    str_to_predict, predictor, num_features=20, num_samples=2000
)
exp.show_in_notebook(text=str_to_predict)

In [None]:
# SHAP implementation based on: https://github.com/PacktPublishing/Applied-Machine-Learning-Explainability-Techniques/blob/main/Chapter07/Explaining_Transformers.ipynb
import shap
def score_and_visualize(text, shap_values):
    prediction = pipe(text)
    # Get index of label
    label_index = labels.index(prediction[0][0]["label"])
    print(f"Model predictions are: {prediction}")
    
    shap.plots.text(shap_values)
    #shap.summary_plot(shap_values.values)
    shap.plots.bar(shap_values[0,:,label_index])    
    #shap.plots.waterfall(shap_values[0,:,label_index])
    
text = ["surprising increase in revenue in spite of decrease in market share"]

# SHAP Explainer
explainer = shap.Explainer(pipe)
shap_values = explainer(text)
score_and_visualize(text, shap_values)
