In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install -q transformers

In [None]:
pip install simpletransformers

In [None]:
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased',return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 6)
    
    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

In [None]:
PATH = '/content/drive/MyDrive/Proyek/Proyek PLN/Testing/bert.pt'
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
LABEL_COLUMNS = ['toxic',
 'severe_toxic',
 'obscene',
 'threat',
 'insult',
 'identity_hate',
 'list']

def load_model():
  model = BERTClass()
  model.to(device)
  model = torch.load(PATH)
  return model

def inference(text):
  encoding = tokenizer.encode_plus(
    text,
    add_special_tokens=True,
    max_length=512,
    return_token_type_ids=True,
    padding="max_length",
    return_attention_mask=True,
    return_tensors='pt',
  )
  ids = encoding['input_ids'].to(device, dtype = torch.long)
  mask = encoding['attention_mask'].to(device, dtype = torch.long)
  token_type_ids = encoding['token_type_ids'].to(device, dtype = torch.long)
  test_prediction = model(ids, mask,token_type_ids)
  test_prediction = torch.sigmoid(test_prediction).cpu().detach().numpy().tolist()
  predictions = []
  for result in test_prediction:
    for prediction in result:
      predictions.append(prediction)
  predictions
  for label, prediction in zip(LABEL_COLUMNS, predictions):
    print(f"{label}: {prediction}")

  


Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
text = "You are such a loser! You'll regret everything you've done to me!"
model = load_model()
inference(text)

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


toxic: 0.9212898015975952
severe_toxic: 0.0046866354532539845
obscene: 0.0902160257101059
threat: 0.002216353779658675
insult: 0.5099189281463623
identity_hate: 0.005419996101409197


## Model Kedua

In [None]:
import pandas as pd
import random, sys
import os
import re
import string
from simpletransformers.classification import ClassificationModel
from scipy.special import softmax

aspect = ['Transisi enrgi','Teknologi Informasi','Budaya Green','Pegawai','Keselarasan Strategi','Tata Kelola','Metode']

def generate_sentence_pair(text):
  sentence_pairs = []
  sentence_pair1 = []
  sentence_pair2 = []
  aspect_sentiment = []
  for i in aspect:
      pair1 = i+"-pos"
      pair2 = i+"-neg"
      sentence_pair1.append(text)
      sentence_pair1.append(pair1)
      sentence_pair2.append(text)
      sentence_pair2.append(pair2)
      sentence_pairs.append(sentence_pair1)
      sentence_pairs.append(sentence_pair2)
      aspect_sentiment.append(pair1)
      aspect_sentiment.append(pair2)
      sentence_pair1 = []
      sentence_pair2 = []
  return sentence_pairs, aspect_sentiment

def clean_text(text):
    text = text.lower()
    text = re.sub('[%s]' % re.escape(string.punctuation.replace('?', '')), '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = re.sub('[‘’“”…]', '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\r', '', text)
    text = text.replace('?', ' ?')
    text = text.replace('\d+', '')
    text = re.sub('[.;:!\'?,\"()\[\]*~]', '', text)
    text = re.sub('(<br\s*/><br\s*/>)|(\-)|(\/)', '', text)
    return text

In [8]:
bert_model = ClassificationModel('bert', '/content/drive/MyDrive/Proyek/Proyek PLN/Testing/bert_model',use_cuda=True, cuda_device=0) 

In [24]:
text = "Saya bisa mengakses banyak ilmu tentang tindakan ramah lingkungan"
text = clean_text(text)
def make_prediction(text):
  sentence_pairs, aspect_sentiment = generate_sentence_pair(text)
  predictions, raw_outputs = bert_model.predict(sentence_pairs)
  test = pd.DataFrame(columns=["aspect-sentiment","label"])
  probs = softmax(raw_outputs,axis=1)
  prb = []
  for pr in probs:
      prb.append(max(pr[0],pr[1]))
  test['peluang'] = prb
  test['aspect-sentiment'] = aspect_sentiment
  test['label'] = predictions
  result = test[test['label'] == 1]
  pattern = 'pos|neg'
  results = result['aspect-sentiment'].str.contains(pattern)
  print(result)

In [25]:
make_prediction(text)

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

   aspect-sentiment  label   peluang
6       Pegawai-pos      1  0.611425
12       Metode-pos      1  0.611396
