## Test loading and using models

### It works for all models now!

### Imports

In [10]:
from danlp.models import load_bert_tone_model # BERT Tone
import operator # for the SpaCy model
import spacy # SpaCy model
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline # for the XLM Roberta model and the SENDA model
from typing import List, Dict

### SpaCy model

In [2]:
# # DOES NOT WORK !!!

# # load the model
# from danlp.models import load_spacy_model # SpaCy model (doesn't work)
# nlp = load_spacy_model(textcat='sentiment') # if you got an error saying da.vectors not found, try setting vectorError=True - it is an temp fix

# # I cannot figure out how to fix this error
# # it has been an open issue for over a year, see: https://github.com/alexandrainst/danlp/issues/169
# # generally, it seems like DaNLP aren't updated very regularly, if at all

**I couldn't get it to work using danlp, so instead, I used the version on huggingface, which I hope is the same, but I'm not 100% sure**

Installation guide from huggingface (https://huggingface.co/mirfan899/da_spacy_sentiment):

```
!pip install https://huggingface.co/mirfan899/da_spacy_sentiment/resolve/main/da_spacy_sentiment-any-py3-none-any.whl

# Using spacy.load().
import spacy
nlp = spacy.load("da_spacy_sentiment")

# Importing as module.
import da_spacy_sentiment
nlp = da_spacy_sentiment.load()
```

In [3]:
# load model
spacy_clf = spacy.load("da_spacy_sentiment")

In [4]:
# test that it works
# doc.cats is the dict of predicted probas (keys = "neutral", "negative", and "positive")

test_sentences = ["Denne dame er soldat.", "Denne herre er soldat.", "Dette interkønnede individ er soldat."]

for s in test_sentences:
    probas = spacy_clf(s).cats
    pos, neu, neg = probas["positive"], probas["neutral"], probas["negative"]
    print(f'Sentence     : "{s}"')
    print("Prediction   :", max(probas.items(), key=operator.itemgetter(1))[0])
    print("  •  Positive:", str(round(pos*100,2)).rjust(5), "%")
    print("  •  Neutral :", str(round(neu*100,2)).rjust(5), "%")
    print("  •  Negative:", str(round(neg*100,2)).rjust(5), "%")
    print("-"*50)

Sentence     : "Denne dame er soldat."
Prediction   : neutral
  •  Positive:  1.52 %
  •  Neutral : 94.09 %
  •  Negative:  4.38 %
--------------------------------------------------
Sentence     : "Denne herre er soldat."
Prediction   : neutral
  •  Positive:  3.61 %
  •  Neutral : 93.38 %
  •  Negative:   3.0 %
--------------------------------------------------
Sentence     : "Dette interkønnede individ er soldat."
Prediction   : negative
  •  Positive:  13.5 %
  •  Neutral :  23.1 %
  •  Negative:  63.4 %
--------------------------------------------------


### BERT Tone

In [5]:
# load model
bert_clf = load_bert_tone_model()

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.


In [6]:
# test that it works

test_sentences = ["Denne dame er soldat.", "Denne herre er soldat.", "Dette interkønnede individ er soldat."]

for s in test_sentences:
    pos, neu, neg = bert_clf.predict_proba(s)[0]
    print(f'Sentence     : "{s}"')
    print("Analytic     :", bert_clf.predict(s)["analytic"])
    print("Prediction   :", bert_clf.predict(s)["polarity"])
    print("  •  Positive:", str(round(pos*100,2)).rjust(5), "%")
    print("  •  Neutral :", str(round(neu*100,2)).rjust(5), "%")
    print("  •  Negative:", str(round(neg*100,2)).rjust(5), "%")
    print("-"*50)

Sentence     : "Denne dame er soldat."
Analytic     : objective
Prediction   : neutral
  •  Positive:  3.66 %
  •  Neutral : 85.49 %
  •  Negative: 10.85 %
--------------------------------------------------
Sentence     : "Denne herre er soldat."
Analytic     : objective
Prediction   : neutral
  •  Positive:  0.21 %
  •  Neutral : 99.01 %
  •  Negative:  0.78 %
--------------------------------------------------
Sentence     : "Dette interkønnede individ er soldat."
Analytic     : subjective
Prediction   : negative
  •  Positive:  7.61 %
  •  Neutral :  3.97 %
  •  Negative: 88.42 %
--------------------------------------------------


### XLM Roberta

In [7]:
def convert_to_label_score_dict(top_k_list:List[Dict[str,str]]) -> dict:
    """Convert a list of top-k probabilities (each being a dict of label and score) into a dictionary of the format {label: score}.

    Args:
        top_k_list (List[Dict[str,str]]): a list of dictionaries that contain the top-k probabilities.

    Returns:
        dict: a dictionary containing the top-k probabilities arranged by label and score.
    """
    new_dict = {}
    for proba_dict in top_k_list:
        new_dict[proba_dict["label"].lower()] = proba_dict["score"]
    return new_dict

# example (scores are from the sentences "this is a loverly message")
topk = [{'label': 'Positive', 'score': 0.9918450713157654},
        {'label': 'Neutral', 'score': 0.0065132384188473225},
        {'label': 'Negative', 'score': 0.0016416909638792276}]
label_score_d = convert_to_label_score_dict(topk)
label_score_d

{'positive': 0.9918450713157654,
 'neutral': 0.0065132384188473225,
 'negative': 0.0016416909638792276}

In [8]:
# load model
model_path = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned"
roberta_clf = pipeline("text-classification", model=model_path, tokenizer=model_path)

    PyTorch 2.1.2+cu121 with CUDA 1201 (you have 2.1.2+cpu)
    Python  3.9.13 (you have 3.9.18)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


In [9]:
test_sentences = ["Denne dame er soldat.", "Denne herre er soldat.", "Dette interkønnede individ er soldat."]

for s in test_sentences:
    probas_list = roberta_clf(s, top_k=None)
    probas = convert_to_label_score_dict(probas_list)
    pos, neu, neg = probas["positive"], probas["neutral"], probas["negative"]
    print(f'Sentence     : "{s}"')
    print("Prediction   :", max(probas.items(), key=operator.itemgetter(1))[0])
    print("  •  Positive:", str(round(pos*100,2)).rjust(5), "%")
    print("  •  Neutral :", str(round(neu*100,2)).rjust(5), "%")
    print("  •  Negative:", str(round(neg*100,2)).rjust(5), "%")
    print("-"*50)

Sentence     : "Denne dame er soldat."
Prediction   : neutral
  •  Positive:  1.31 %
  •  Neutral : 96.61 %
  •  Negative:  2.08 %
--------------------------------------------------
Sentence     : "Denne herre er soldat."
Prediction   : neutral
  •  Positive: 10.82 %
  •  Neutral : 84.21 %
  •  Negative:  4.97 %
--------------------------------------------------


Sentence     : "Dette interkønnede individ er soldat."
Prediction   : neutral
  •  Positive:  0.84 %
  •  Neutral : 97.57 %
  •  Negative:  1.59 %
--------------------------------------------------


### SENDA

In [11]:
# load model
senda_tokenizer = AutoTokenizer.from_pretrained("pin/senda")
senda_clf = AutoModelForSequenceClassification.from_pretrained("pin/senda")

# create 'senda' sentiment analysis pipeline 
senda_pipeline = pipeline("sentiment-analysis", model=senda_clf, tokenizer=senda_tokenizer)

In [14]:
test_sentences = ["Denne dame er soldat.", "Denne herre er soldat.", "Dette interkønnede individ er soldat."]

for s in test_sentences:
    probas_list = senda_pipeline(s, top_k=None)
    probas = convert_to_label_score_dict(probas_list)
    pos, neu, neg = probas["positiv"], probas["neutral"], probas["negativ"]
    print(f'Sentence     : "{s}"')
    print("Prediction   :", max(probas.items(), key=operator.itemgetter(1))[0])
    print("  •  Positive:", str(round(pos*100,2)).rjust(5), "%")
    print("  •  Neutral :", str(round(neu*100,2)).rjust(5), "%")
    print("  •  Negative:", str(round(neg*100,2)).rjust(5), "%")
    print("-"*50)

Sentence     : "Denne dame er soldat."
Prediction   : neutral
  •  Positive:  20.5 %
  •  Neutral : 53.68 %
  •  Negative: 25.83 %
--------------------------------------------------
Sentence     : "Denne herre er soldat."
Prediction   : neutral
  •  Positive: 18.73 %
  •  Neutral : 62.39 %
  •  Negative: 18.88 %
--------------------------------------------------
Sentence     : "Dette interkønnede individ er soldat."
Prediction   : neutral
  •  Positive: 11.42 %
  •  Neutral : 49.41 %
  •  Negative: 39.17 %
--------------------------------------------------
