In [2]:
import json

path = "Bert_pipeline.ipynb"  # đúng tên file của bạn
with open(path, "r", encoding="utf-8") as f:
    nb = json.load(f)

nb.get("metadata", {}).pop("widgets", None)

for cell in nb.get("cells", []):
    cell.get("metadata", {}).pop("widgets", None)

with open(path, "w", encoding="utf-8") as f:
    json.dump(nb, f, ensure_ascii=False, indent=1)

print("Done: removed metadata.widgets")


Done: removed metadata.widgets


In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

MODEL_DIR = "outputs/bert_best"
id2label = {0: "hate_speech", 1: "offensive_language", 2: "neither"}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = BertTokenizer.from_pretrained(MODEL_DIR)
model = BertForSequenceClassification.from_pretrained(MODEL_DIR).to(device)
model.eval()

def predict(text):
    enc = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=64)
    enc = {k: v.to(device) for k, v in enc.items()}
    with torch.no_grad():
        logits = model(**enc).logits
        probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
        pred = int(probs.argmax())
    return pred, id2label[pred], probs

text = "Technology is advancing so fast these days."
pred_id, pred_label, probs = predict(text)
print("Text:", text)
print("Pred:", pred_id, pred_label)
print("Probs [hate, offensive, neither]:", probs)


Text: Technology is advancing so fast these days.
Pred: 2 neither
Probs [hate, offensive, neither]: [0.00594012 0.10271147 0.8913485 ]


In [3]:
text = "I hate you!"
pred_id, pred_label, probs = predict(text)
print("Text:", text)
print("Pred:", pred_id, pred_label)
print("Probs [hate, offensive, neither]:", probs)

Text: I hate you!
Pred: 1 offensive_language
Probs [hate, offensive, neither]: [0.44031024 0.54600185 0.01368792]


In [4]:
text = "Can we just all get along?"
pred_id, pred_label, probs = predict(text)
print("Text:", text)
print("Pred:", pred_id, pred_label)
print("Probs [hate, offensive, neither]:", probs)

Text: Can we just all get along?
Pred: 2 neither
Probs [hate, offensive, neither]: [0.05120184 0.11343684 0.83536136]
