In [1]:
from transformers import (
    AutoModelForTokenClassification,
    AutoTokenizer,
    DataCollatorForTokenClassification,
)
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import torch
from tqdm import tqdm
from seqeval.metrics import classification_report
from nlp_project.models import CubeBert
from nlp_project.data import json_to_Dataset, json_to_Dataset_adv, json_to_Dataset_ensemble
from nlp_project.utils import compute_metrics, compute_ensemble_metrics

all_labels = [
    "B-STREET",
    "B-CITY",
    "I-DATE",
    "B-PASS",
    "I-CITY",
    "B-TIME",
    "B-EMAIL",
    "I-DRIVERLICENSE",
    "I-POSTCODE",
    "I-BOD",
    "B-USERNAME",
    "B-BOD",
    "B-COUNTRY",
    "B-SECADDRESS",
    "B-IDCARD",
    "I-SOCIALNUMBER",
    "I-PASSPORT",
    "B-IP",
    "O",
    "B-TEL",
    "B-SOCIALNUMBER",
    "I-TIME",
    "B-BUILDING",
    "B-PASSPORT",
    "I-TITLE",
    "I-SEX",
    "I-STREET",
    "B-STATE",
    "I-STATE",
    "B-TITLE",
    "B-DATE",
    "B-GEOCOORD",
    "I-IDCARD",
    "I-TEL",
    "B-POSTCODE",
    "B-DRIVERLICENSE",
    "I-GEOCOORD",
    "I-COUNTRY",
    "I-EMAIL",
    "I-PASS",
    "B-SEX",
    "I-USERNAME",
    "I-BUILDING",
    "I-IP",
    "I-SECADDRESS",
    "B-CARDISSUER",
    "I-CARDISSUER",
]

id2label = {i: l for i, l in enumerate(all_labels)}
label2id = {v: k for k, v in id2label.items()}
n_labels = len(all_labels)

  from .autonotebook import tqdm as notebook_tqdm


In [41]:
def compute_all_metrics(model, tokenizer, dataset, batch_size=8):
    model = model.to("cuda")
    model.eval()

    # Drop non-numeric columns
    dataset = dataset.remove_columns(["source_text", "tokens"])

    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        collate_fn=DataCollatorForTokenClassification(
            tokenizer, return_tensors="pt"
        ),
    )

    all_predictions = []
    all_labels = []
    all_predictions_text = []
    all_labels_text = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch["input_ids"].to("cuda")
            attention_mask = batch["attention_mask"].to("cuda")
            labels = batch["labels"]

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1).cpu().numpy()
            labels = labels.numpy()

            all_predictions.extend(preds)
            all_labels.extend(labels)

            for pred_seq, label_seq in zip(preds, labels):
                pred_labels = []
                true_labels = []
                for p, l in zip(pred_seq, label_seq):
                    if l == -100:
                        continue
                    pred_labels.append(id2label[p])
                    true_labels.append(id2label[l])
                all_predictions_text.append(pred_labels)
                all_labels_text.append(true_labels)

    print(classification_report(all_labels_text, all_predictions_text))

    return compute_metrics(all_predictions, all_labels)

In [None]:
d_test = [i for i in json_to_Dataset("data/distilbert_test.json")]
d_test_adv = [i for i in json_to_Dataset_adv("data/distilbert_test_adv.json")]
a_test = [i for i in json_to_Dataset("data/albert_test.json")]
a_test_adv = [i for i in json_to_Dataset_adv("data/albert_test_adv.json")]

In [20]:
d1_model = AutoModelForTokenClassification.from_pretrained(
    "models/distilbert1"
)
d1_tokenizer = AutoTokenizer.from_pretrained("models/distilbert1")

a1_model = AutoModelForTokenClassification.from_pretrained(
    "models/albert1"
)
a1_tokenizer = AutoTokenizer.from_pretrained("models/albert1")

d1_model_adv = AutoModelForTokenClassification.from_pretrained(
    "models/distilbert1_adv"
)
d1_tokenizer_adv = AutoTokenizer.from_pretrained("models/distilbert1_adv")

a1_model_adv = AutoModelForTokenClassification.from_pretrained(
    "models/albert1_adv"
)
a1_tokenizer_adv = AutoTokenizer.from_pretrained("models/albert1_adv")

# Inference on standard dataset

In [None]:
res = compute_all_metrics(d1_model, d_test)
res

In [16]:
conf = np.log(res["confusion_matrix"] + 0.01)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(conf, cmap="Reds")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix Distilbert Finetuned 1")
plt.show()

In [None]:
res = compute_all_metrics(a1_model, a_test)
res

In [None]:
conf = np.log(res["confusion_matrix"] + 0.01)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(conf, cmap="Reds")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix Albert Finetuned 1")
plt.show()

# Inference on adversarial dataset

In [None]:
res = compute_all_metrics(d1_model, d_test_adv)
res

In [None]:
conf = np.log(res["confusion_matrix"] + 0.01)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(conf, cmap="Reds")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix Distilbert Finetuned 1")
plt.show()

In [None]:
res = compute_all_metrics(a1_model, a_test_adv)
res

In [None]:
conf = np.log(res["confusion_matrix"] + 0.01)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(conf, cmap="Reds")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix Albert Finetuned 1")
plt.show()

# Inference on adversarial dataset (post adv fine tuning)

In [None]:
res = compute_all_metrics(d1_model_adv, d_test_adv)
res

In [None]:
conf = np.log(res["confusion_matrix"] + 0.01)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(conf, cmap="Reds")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix Distilbert 1 Adversarial")
plt.show()

In [None]:
res = compute_all_metrics(a1_model_adv, a_test_adv)
res

In [None]:
conf = np.log(res["confusion_matrix"] + 0.01)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(conf, cmap="Reds")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix Albert 1 Adversarial")
plt.show()

# Ensemble

In [None]:
model = CubeBert(distilbert_tuned="models/distilbert1", albert_tuned="models/albert1")
state_dict = torch.load("model_state.pth")
model.load_state_dict(state_dict)
model.eval()

In [10]:
data = [i for i in json_to_Dataset_ensemble("data/ensemble_test.json")]

In [None]:
res = compute_ensemble_metrics(model, data)
print(res)

In [13]:
conf = np.log(res["confusion_matrix"] + 0.01)

In [None]:
plt.figure(figsize=(10, 7))
sns.heatmap(conf, cmap="Reds")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix CubeBERT")
plt.show()