In [2]:
import copy

from transformers import pipeline

In [54]:
classifier = pipeline(model="lvwerra/distilbert-imdb", task="sentiment-analysis")
classifier("I have Never forgot this movie. All these years and it has remained in my life.")

[{'label': 'POSITIVE', 'score': 0.9885864853858948}]

In [60]:
classifier.tokenizer("I have Never forgot this movie. All these years and it has remained in my")

{'input_ids': [101, 1045, 2031, 2196, 9471, 2023, 3185, 1012, 2035, 2122, 2086, 1998, 2009, 2038, 2815, 1999, 2026, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [62]:
classifier

ValueError: You need to specify either `text` or `text_target`.

In [95]:
classifier = pipeline(model="lvwerra/distilbert-imdb", task="sentiment-analysis")
classifier("I still remember watching Satya for the first time. I was completely blown away.")

[{'label': 'POSITIVE', 'score': 0.8662851452827454}]

In [96]:
classifier("I rented this movie to get an easy, entertained view of the history of Texas. I got a headache instead.")

[{'label': 'NEGATIVE', 'score': 0.9796644449234009}]

In [17]:
import numpy as np
import copy

class NLPGame:

    def __init__(self, input_sentence):
        self.classifier = pipeline(model="lvwerra/distilbert-imdb", task="sentiment-analysis")
        self.tokenizer = self.classifier.tokenizer
        self.tokenized_input = np.asarray(self.tokenizer(input_sentence)['input_ids'][1:-1])
        self.input_sentence = self.tokenizer.decode(self.tokenized_input)
        self.n = len(self.tokenized_input)
        self.original_output = copy.copy(self.classifier(self.input_sentence))

    def call(self, x):
        outputs = self.classifier(x)
        outputs = [output['score'] * 1 if output['label'] == 'POSITIVE' else output['score'] * -1 for output in outputs]
        return outputs

    def set_call(self, S):
        x_inputs = []
        for i, s in enumerate(S):
            token_subset = self.tokenized_input[s]
            x_text = self.tokenizer.decode(token_subset)
            x_inputs.append(x_text)
        return self.call(x_inputs)

input_sent = "I still remember watching Satya for the first time. I was completely blown away."
nlp_game = NLPGame(input_sent)

In [25]:
nlp_game.set_call([[1, 9]])

still time


[0.8703174591064453]

In [8]:
list(np.asarray(nlp_game.tokenized_input)[[5,3]])

[3148, 3666]

In [101]:
nlp_game.tokenizer.decode([1045,
 2145,
 3342,
 3666,
 2938,
 3148,
 2005,
 1996,
 2034,
 1012,
 1045,
 2001,
 3294,
 10676,
 2185,
 1012])

'i still remember watching satya for the first. i was completely blown away.'

In [90]:
nlp_game.classifier("i still remember watching satya for the first time. i was completely blown away.")

[{'label': 'POSITIVE', 'score': 0.8662851452827454}]

In [92]:
nlp_game.classifier(nlp_game.tokenizer.decode(nlp_game.tokenized_input[1:-1]))

[{'label': 'POSITIVE', 'score': 0.8662851452827454}]

In [81]:
nlp_game.tokenizer.decode(nlp_game.tokenized_input[1:-1])

'i still remember watching satya for the first time. i was completely blown away.'

In [4]:
pos_1 = "I have Never forgot this movie. All these years and it has remained in my life."
neg_1 = "TWINS EFFECT is a poor film in so many respects. The only good element is that it doesn’t take itself seriously.."
neg_2 = "I rented this movie to get an easy, entertained view of the history of Texas. I got a headache instead."
neg_3 = "Truly appalling waste of space. Me and my friend tried to watch this film to its conclusion but had to switch it off about 30 minutes from the end."
pos_2 = "I still remember watching Satya for the first time. I was completely blown away."

In [5]:
print(sentiment_analysis("I have Never forgot this movie. All these years and it has remained in my life.")[0])
print(sentiment_analysis("I Never forgot this movie. All these years and it has remained in my life.")[0])
print(sentiment_analysis("I have forgot this movie. All these years and it has remained in my life.")[0])
print(sentiment_analysis("I have Never this movie. All these years and it has remained in my life.")[0])
print(sentiment_analysis("I have Never forgot movie. All these years and it has remained in my life.")[0])
print(sentiment_analysis("I have Never forgot this All these years and it has remained in my life.")[0])
print(sentiment_analysis("I have Never forgot this movie. these years and it has remained in my life.")[0])

{'label': 'POSITIVE', 'score': 0.9996297359466553}
{'label': 'POSITIVE', 'score': 0.9996439218521118}
{'label': 'POSITIVE', 'score': 0.9811363816261292}
{'label': 'POSITIVE', 'score': 0.9987378716468811}
{'label': 'POSITIVE', 'score': 0.9996277093887329}
{'label': 'POSITIVE', 'score': 0.9997320771217346}
{'label': 'POSITIVE', 'score': 0.99965500831604}


In [None]:
print(sentiment_analysis("I have this movie. these years and it has remained in my life.")[0])

In [7]:
print(sentiment_analysis("I have this movie. these years and it has remained in my")[0])

{'label': 'POSITIVE', 'score': 0.9951967597007751}


In [27]:
print(sentiment_analysis(pos_1)[0])
print(sentiment_analysis(neg_1)[0])
print(sentiment_analysis(neg_2)[0])
print(sentiment_analysis(neg_3)[0])
print(sentiment_analysis(pos_2)[0])

{'label': 'POSITIVE', 'score': 0.9996297359466553}
{'label': 'NEGATIVE', 'score': 0.9997658133506775}
{'label': 'NEGATIVE', 'score': 0.9977344274520874}
{'label': 'NEGATIVE', 'score': 0.9998062252998352}
{'label': 'NEGATIVE', 'score': 0.9991011619567871}


In [28]:
sentiment_analysis(pos_1)

[{'label': 'POSITIVE', 'score': 0.9996297359466553}]

In [30]:
from transformers import XLNetForSequenceClassification

In [1]:
import torch
from transformers import XLNetTokenizer, XLNetForSequenceClassification

tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased")

inputs = tokenizer("I have Never forgot this movie. All these years and it has remained in my life.", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/760 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.weight', 'sequence_summary.summary.bias', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

'LABEL_1'

In [2]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

In [8]:
logits

tensor([[0.0674, 0.1090]])

In [7]:
predicted_class_id = logits.argmax().item()
predicted_class_id

IndexError: index 1 is out of bounds for dimension 0 with size 1

In [10]:
import torch.functional as F

In [15]:
softmax = torch.nn.Softmax(dim=1)
softmax(logits)

tensor([[0.4896, 0.5104]])

In [16]:
with torch.no_grad():
    logits = model(**inputs).logits

In [19]:
tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased")

inputs = tokenizer(neg_1, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
class_id = model.config.id2label[predicted_class_id]
softmax(logits)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'logits_proj.weight', 'sequence_summary.summary.bias', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

tensor([[0.4098, 0.5902]])

In [22]:
inputs = tokenizer(neg_3, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
class_id = model.config.id2label[predicted_class_id]
softmax(logits)

tensor([[0.3960, 0.6040]])