In [14]:
import pickle

import torch
import torch.nn as nn
import torch.optim as optim
from retnet.modeling_retnet import RetNetForSequenceClassification
from retnet.configuration_retnet import load_config_from_json
from transformers import AutoTokenizer

----

In [15]:
model = RetNetForSequenceClassification.from_pretrained("./model_store/model_small_classifier")

In [18]:
tokenizer = AutoTokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

In [19]:
import re

def transform(sentence):
        return re.sub('[^A-Za-z0-9.,;!?]+', ' ', sentence) + tokenizer.eos_token

def tokenize_sentence(example):
        example = transform(example)
        input_ids = tokenizer(example,
                              truncation=True,
                              padding='max_length',
                              max_length=48,
                              return_tensors='pt')
        return input_ids

In [20]:
sentence_stock = ["It's great ! Wonderfull !", \
                  "It's very bad...", \
                    "I hate this film..." \
                    "I'm very hungry with this play... All actors has playing very bad...",
                    "Sometimes, i'm really wonder if i don't spend my time with productions built from this production house",
                    "I had a very good feeling to come in this place...",
                    "All the protagonist are very realistic, the plot is captivating",
                    "Don't care about this... be happy and that's all !",
                    "Alice was very busy... She should take better care of herself"]

In [21]:
for sentence in sentence_stock:
    print(f"--- \n {sentence}")
    input = tokenize_sentence(sentence)
    print(input)
    class_predicted = model(input["input_ids"], input["attention_mask"]).logits
    print(f"=> {class_predicted.argmax()} -------------------------------------------- {list(class_predicted)}")

--- 
 It's great ! Wonderfull !
{'input_ids': tensor([[ 1026,   264,  1049,  5145, 12902, 12853,  5145, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])}
=> 1 -------------------------------------------- [tensor([-0.9782,  1.2029], grad_fn=<UnbindBackward0>)]
--- 
 It's very bad...
{'input_ids': tensor([[ 1026,   264,   845,  2089,   986, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
       

In [47]:
tokenizer.encode(['It ist great !', "It is bad..."])

[1026, 318, 83, 1049, 5145, 1026, 318, 2089, 986]

In [46]:
context_inputs = tokenizer(, return_tensors='pt')

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

In [44]:
context_inputs 

{'input_ids': tensor([[  40,  836,  470, 1833,  986]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}

In [45]:
model(context_inputs["input_ids"])

SequenceClassifierOutputWithPast(loss=None, logits=tensor([[ 0.4269, -0.0693]], grad_fn=<IndexBackward0>), past_key_values=({'prev_key_value': tensor([[[[-0.3554,  0.1702, -0.1555,  ...,  0.0744, -0.0582,  0.0994],
          [-0.1459,  0.0344,  0.0225,  ...,  0.0431, -0.0960, -0.0581],
          [ 0.1198,  0.0479,  0.0368,  ..., -0.0347,  0.3056,  0.0968],
          ...,
          [-0.2426,  0.1347,  0.0058,  ...,  0.0787,  0.0954,  0.0242],
          [-0.0512, -0.0976, -0.0571,  ..., -0.1208,  0.0836,  0.0258],
          [-0.0028, -0.0549,  0.0241,  ...,  0.0038, -0.0247, -0.0483]],

         [[ 0.0044, -0.0332, -0.3063,  ...,  0.2979, -0.4021,  0.0009],
          [ 0.0870, -0.1101, -0.4006,  ...,  0.2710, -0.3908, -0.0211],
          [-0.1471,  0.0977,  0.0821,  ...,  0.1283, -0.0611, -0.1471],
          ...,
          [ 0.1809, -0.0902, -0.1389,  ...,  0.0034, -0.0636,  0.2156],
          [ 0.2638, -0.1396, -0.0894,  ..., -0.0811,  0.0369,  0.1392],
          [-0.1615,  0.1795,  0.2

In [4]:
# parallel forward
# our custom generate function
generated = model.custom_generate(context_inputs['input_ids'], parallel_compute_prompt=True, max_new_tokens=200)

In [5]:
generated = model.generate(**context_inputs, max_new_tokens=20)

In [6]:
tokenizer.batch_decode(generated)

['It was very interesting but ick ernest ichi ichi the killer ivan character accepts the news of his illness']

In [9]:

context_sentence = ["The best trip I've ever made is",\
                    "This film is very disappointing because",\
                    "Do you want we go to theater this nigth ? I'm very impatient to",\
                     "How do you feel about things in general ?",\
                    "Explain me what you're talking about..."]

In [10]:
for sentence in context_sentence:
    print(f"--- Context --- \n {sentence}")
    context_inputs = tokenizer(sentence, return_tensors="pt")
    generated = model.custom_generate(context_inputs['input_ids'], parallel_compute_prompt=True, max_new_tokens=20)
    print(f"---- Response ---- \n {tokenizer.batch_decode(generated)}")
    print("----")

--- Context --- 
 The best trip I've ever made is
---- Response ---- 
 ["The best trip I've ever made is going to be something really good ichi the killer. ivan and bale reduced mainly to batting"]
----
--- Context --- 
 This film is very disappointing because
---- Response ---- 
 ['This film is very disappointing because of its many excesses. ian holm ian holm as the aged napoleon ']
----
--- Context --- 
 Do you want we go to theater this nigth ? I'm very impatient to
---- Response ---- 
 ["Do you want we go to theater this nigth? I'm very impatient to be fondly remembered in the endlessly challenging maze of moviegoing. ian holm as the mother"]
----
--- Context --- 
 How do you feel about things in general ?
---- Response ---- 
 ['How do you feel about things in general? ivan ivan is a prince of a fellow iced with this one to kill a the world']
----
--- Context --- 
 Explain me what you're talking about...
---- Response ---- 
 ["Explain me what you're talking about... a movie that, 

---