In [0]:
!pip install transformers sentencepiece

Collecting transformers
  Using cached transformers-4.32.1-py3-none-any.whl (7.5 MB)
Collecting sentencepiece
  Using cached sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting pyyaml>=5.1
  Using cached PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (738 kB)
Collecting safetensors>=0.3.1
  Using cached safetensors-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting regex!=2019.12.17
  Using cached regex-2023.8.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (771 kB)
Collecting huggingface-hub<1.0,>=0.15.1
  Using cached huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Using cached tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
Collecting tqdm>=4.27
  Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)
Collecting fsspec
  Using cached fsspec-2023.9.0-py3-none-any.whl (173 kB)
In

In [0]:
from transformers import pipeline
import tensorflow as tf



In [0]:
model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)

Downloading (…)lve/main/config.json:   0%|          | 0.00/841 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

In [0]:
sentiment_task("I hate fish")

Out[4]: [{'label': 'negative', 'score': 0.9187542796134949}]

In [0]:
##################################################################################################

In [0]:
#from transformers import  TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
import numpy as np
from scipy.special import softmax

In [0]:
MODEL = f"cardiffnlp/twitter-xlm-roberta-base-sentiment"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)

# PT
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)

In [0]:
text = ["I like dogs and cats"]
encoded_input = tokenizer(text, return_tensors='pt')
print(encoded_input)
output = model(**encoded_input)
print(output)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
print(scores)

{'input_ids': tensor([[    0,    87,  1884, 10269,     7,   136,  7515,     7,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1]])}
SequenceClassifierOutput(loss=None, logits=tensor([[-1.4623,  0.1009,  0.8089]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)
[0.06466233 0.3087012  0.62663645]


In [0]:
# Print labels and scores
ranking = np.argsort(scores)
ranking = ranking[::-1]
for i in range(scores.shape[0]):
    l = config.id2label[ranking[i]]
    s = scores[ranking[i]]
    print(f"{i+1}) {l} {np.round(float(s), 4)}")

1) positive 0.6266
2) neutral 0.3087
3) negative 0.0647


In [0]:
#################################################################################

In [0]:
sen_pos = "I like dogs and cats"
sen_neg = "I hate fish"

In [0]:
pt_batch = tokenizer(
    [sen_pos,sen_neg],
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors="pt"
)

In [0]:
pt_batch

Out[14]: {'input_ids': tensor([[    0,    87,  1884, 10269,     7,   136,  7515,     7,     2],
        [    0,    87, 35463, 67155,     2,     1,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 0, 0, 0, 0]])}

In [0]:
#padding tokens create pads the input and hence we get attention span zero tokens

In [0]:
pt_outputs = model(**pt_batch)

In [0]:
pt_outputs

Out[17]: SequenceClassifierOutput(loss=None, logits=tensor([[-1.4623,  0.1009,  0.8089],
        [ 1.8480, -0.9472, -1.7520]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)

In [0]:
from torch import nn
pt_pred = nn.functional.softmax(pt_outputs.logits , dim=-1)

In [0]:
pt_pred

Out[19]: tensor([[0.0647, 0.3087, 0.6266],
        [0.9188, 0.0561, 0.0251]], grad_fn=<SoftmaxBackward>)

In [0]:
#DeBERTa

In [0]:
pip install torch --upgrade


In [0]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
sequence_to_classify = "Angela Merkel is a politician in Germany and leader of the CDU"
candidate_labels = ["politics", "economy", "entertainment", "environment"]
output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
print(output)



Downloading (…)okenizer_config.json:   0%|          | 0.00/417 [00:00<?, ?B/s]

Downloading spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/18.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/156 [00:00<?, ?B/s]



{'sequence': 'Angela Merkel is a politician in Germany and leader of the CDU', 'labels': ['politics', 'economy', 'environment', 'entertainment'], 'scores': [0.982321560382843, 0.007280207239091396, 0.005891883280128241, 0.0045062825083732605]}


In [0]:
# Find the index of the label with the highest score
index_of_highest_score = output["scores"].index(max(output["scores"]))

# Get the label with the highest score
label_with_highest_score = output["labels"][index_of_highest_score]

# Print the label with the highest score
print("Label with the Highest Score:", label_with_highest_score)

Label with the Highest Score: politics
