# Import Libs

In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import neattext.functions as nfx
import re
import torch
import pandas as pd

# Helpers

In [2]:
# labels
labels = [
    'bug', 
    'enhancement', 
    'question'
]

reg_obj = re.compile(r'[^\u0000-\u007F]+', re.UNICODE)
def is_english_text(text):
    return (False if reg_obj.match(text) else True)

# remove the stopwords, emojis from the text and convert it into lower case
def neatify_text(text):
    text = str(text).lower()
    text = nfx.remove_stopwords(text)
    text = nfx.remove_emojis(text)
    return text


# Loading Model

In [3]:
MODEL_DIR = './model/distil-bert-uncased-finetuned-github-issues/'

In [4]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)

# Inference

In [37]:
# Change your text here
text = """
Keras load_image is not working if I display any image on the top of the page :(
"""

text2 = """请问可以加个text field吗？
"""


In [31]:
def inference(text):
    # strip away any " " and \n or \t
    text = text.strip(" \n\t")
    
    if is_english_text(text):
        text = neatify_text(text)
        tokenized_sentence = tokenizer(text, return_tensors='pt')
        output = model(**tokenized_sentence)
        predictions = torch.nn.functional.softmax(output.logits, dim=-1)
        _, preds = torch.max(predictions, dim=-1)
        predicted = labels[preds.item()]
        print(f"Predicted: {predicted}")
    else:
        print("Sentence have to be in english language.")

In [38]:
inference(text)

Predicted: bug


In [40]:
# inference on eval data
test_data = pd.read_pickle("./dataset/eval.pkl")

In [50]:
# modify ith sample here
i = 900

X, actual = test_data.iloc[i]['descriptions'], test_data.iloc[i]['labels']
print(f"Descriptions: {X}\n")
inference(X)
print(f"Actual: {actual}")

Descriptions: correct daily tweet count value twitto started, daily automated tweet reports incomplete period. tweeted value "number geolocated tweets 24 hours" incorrect. instead scheduled right away, daily tweet scheduled published 24h period.

Predicted: bug
Actual: bug
