In [1]:
from torch.utils.data import DataLoader
import sys
import numpy as np
import torch
from transformers import AutoTokenizer

sys.path.insert(0, '../src')

from MLEC import SpanEmo, DataClass


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
seed = 42
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Currently using {}".format(device))

def set_seed(seed):    
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(seed)

MAX_LENGTH = 128
DROPOUT_RATE = 0.1
TOKENIZER_NAME = "indolem/indobert-base-uncased"
ENCODER_NAME = "indolem/indobert-base-uncased"
SPANEMO_WEIGHTS = {
    "LCA" : 0.6,
    "ZLPR" : 0.4,
}
EMOREC_WEIGHTS = {
    "LCA" : 0.9,
    "ZLPR" : 0.0,
}
TOKENIZER_NAME = "indolem/indobert-base-uncased"
ENCODER_NAME = "indolem/indobert-base-uncased"

EMOTIONS = ["Marah","Antisipasi","Jijik","Takut","Bahagia","Sedih","Terkejut","Percaya"]
LABEL_SIZE = len(EMOTIONS)

Currently using cpu


In [3]:
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
tokenizer.add_tokens(EMOTIONS)

8

In [4]:
spanemo = SpanEmo(
    output_dropout=DROPOUT_RATE,
    alpha=SPANEMO_WEIGHTS["LCA"],
    beta=SPANEMO_WEIGHTS["ZLPR"],
    device=device,
    encoder_name=ENCODER_NAME,
    embedding_vocab_size=len(tokenizer),
)
state_dict = torch.load("../models/spanemo.pt", map_location="cpu")
spanemo.to(device).load_state_dict(state_dict)
spanemo.eval()

SpanEmo(
  (encoder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(31931, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        

In [9]:
text = input("Enter text: ")
# concat with EMOTIONS
emotions_text = " ".join(EMOTIONS) + "?"
# text = emotions_text + " " + text
# tokenize
inputs = tokenizer.encode_plus(
    emotions_text,
    text,
    add_special_tokens=True,
    max_length=MAX_LENGTH,
    padding="max_length",
    truncation=True,
    return_attention_mask=True,
)

input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
label_idxs = torch.tensor([1,2,3,4,5,6,7,8])

input_ids = torch.tensor(input_ids).to(device)
attention_mask = torch.tensor(attention_mask).to(device)
label_idxs = label_idxs.to(device)

# transform to single batch
input_ids = input_ids.unsqueeze(0)
attention_mask = attention_mask.unsqueeze(0)
label_idxs = label_idxs.unsqueeze(0)

with torch.no_grad():
    print("Teks : ", text)
    _, y_pred, logits, _ = spanemo(input_ids, attention_mask, label_idxs=label_idxs)
    idx = np.where(y_pred[0] == 1)
    # get the emotion
    print("Emotion: ", np.array(EMOTIONS)[idx])
    
    
    

Teks :  "Makasih banyak kak giveawaynya, semoga rezeki kakak dibalas yang lebih besar amin  ooya aku dom depok hehe"
Emotion:  ['Antisipasi' 'Bahagia']
