In [None]:
#
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from datasets import load_dataset
import torch
import torch.nn.functional as F
import re, html, unicodedata

def clean_text_bert(text):
    text = html.unescape(str(text))
    text = re.sub(r'<.*?>', ' ', text)
    text = unicodedata.normalize("NFKC", text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'([,.!?])\1+', r'\1', text)  # thu gọn dấu câu lặp
    text = text.lower()  
    return text

model_path = "model-bert"  # thư mục chứa model đã train
tokenizer = DistilBertTokenizer.from_pretrained(model_path)
model = DistilBertForSequenceClassification.from_pretrained(model_path)
model.eval()  # chuyển sang chế độ dự đoán

# Load dữ liệu SST-2 từ HuggingFace 
dataset = load_dataset("glue", "sst2")
test_data = dataset["validation"]  

def predict_sentiment(text):
    text = clean_text_bert(text)
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=-1)
        pred = torch.argmax(probs, dim=1).item()
        conf = probs[0][pred].item()
    label = "Tích cực 😀" if pred == 1 else "Tiêu cực 😞"
    return label, conf, pred
correct = 0
total = 20  # số mẫu muốn test thử
for i in range(total):
    text = test_data[i]['sentence']
    true_label = test_data[i]['label']  # 1 = positive, 0 = negative
    label, conf, pred = predict_sentiment(text)
    print(f"{i+1}. {text}\n→ {label} (thật: {true_label}, xác suất={conf:.2f})\n")
    correct += int(pred == true_label)

print(f"\n Accuracy trên {total} mẫu đầu tiên: {correct/total:.3f}")


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install h

1. it 's a charming and often affecting journey . 
→ Tích cực 😀 (thật: 1, xác suất=1.00)

2. unflinchingly bleak and desperate 
→ Tích cực 😀 (thật: 0, xác suất=0.98)

3. allows us to hope that nolan is poised to embark a major career as a commercial yet inventive filmmaker . 
→ Tích cực 😀 (thật: 1, xác suất=0.93)

4. the acting , costumes , music , cinematography and sound are all astounding given the production 's austere locales . 
→ Tích cực 😀 (thật: 1, xác suất=0.84)

5. it 's slow -- very , very slow . 
→ Tiêu cực 😞 (thật: 0, xác suất=0.55)

6. although laced with humor and a few fanciful touches , the film is a refreshingly serious look at young women . 
→ Tích cực 😀 (thật: 1, xác suất=1.00)

7. a sometimes tedious film . 
→ Tiêu cực 😞 (thật: 0, xác suất=0.84)

8. or doing last year 's taxes with your ex-wife . 
→ Tiêu cực 😞 (thật: 0, xác suất=0.90)

9. you do n't have to know about music to appreciate the film 's easygoing blend of comedy and romance . 
→ Tích cực 😀 (thật: 1, xá