In [6]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import torch
import json

# Load your fine-tuned model and tokenizer
model_path = "models/ModernBERT/best_modernbert_model"
#model_path = "models/DeBERTa/best_deberta_model_0684"  # Change this to your actual model path

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path)

In [7]:
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="first")

Device set to use cuda:0


In [8]:
text = "\nThe Australian government has banned all Kaspersky Lab products and web services from its systems and devices following an analysis that claims the company poses a significant security risk to the country.\n\"After considering threat and risk analysis, I have determined that the use of Kaspersky Lab, Inc. products and web services by Australian Government entities poses an unacceptable security risk to Australian Government, networks and data, arising from threats of foreign interference, espionage and sabotage,\" justified\u00a0Stephanie Foster, Secretary of the Department of Home Affairs.\n\"I have also considered the important need for a strong policy signal to critical infrastructure and other Australian governments regarding the unacceptable security risk associated with the use of Kaspersky Lab, Inc. products and web services.\"\nAccording to the directive issued by the Department of Home Affairs, all non-corporate Commonwealth entities (subject to the Public Governance, Performance and Accountability Act 2013) must:\nA provision for exemption exists for cases where using Kaspersky products is necessary for national security or regulatory functions, including compliance and law enforcement.\nResponding to our request for a comment, a Kaspersky spokesperson refuted the cited allegations, saying the cited risks \"are not based on specific evidence and no due process has been organized or followed to provide justification.\"\nThe cybersecurity company stated that the real reasons behind this sudden ban are purely political.\n\"Kaspersky believes that the decision stems from the current geopolitical climate and was not supported by any technical assessment of the company's products, which the company has been continuously advocating for,\" stated Mai Al Akkad, Kaspersky's Corporate Communications Manager.\n\"The fact that the directive was issued without any warning or opportunity for engagement to address the Australian Government's concerns highlights its political nature.\"\nThis move by the Australian government follows similar actions in other Western countries that have also cited national security and espionage concerns.\nThe U.S. prohibited the use of Kaspersky products on government systems in 2017 and expanded the ban to cover all U.S. companies and consumers on September 29, 2024.\nThe German government advised companies in the country against using Kaspersky products soon after the invasion of Ukraine, while Canada banned the use of Kaspersky security products on the mobile devices of government employees in October 2023.\nBased on an analysis of 14M malicious actions, discover the top 10 MITRE ATT&CK techniques behind 93% of attacks and how to defend against them.\nUS healthcare org pays $11M settlement over alleged cybersecurity lapses\nSwiss critical sector faces new 24-hour cyberattack reporting rule\nNew Chirp tool uses audio tones to transfer data between devices\nTrump bans China-linked apps for collecting Americans\u2019 data\nOpen-source tool 'Rayhunter' helps users detect Stingray attacks"
file_text = open("../raw_text_test_data/99.txt", encoding="utf-8").read()

In [9]:
# Tokenize and predict
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputs = tokenizer(file_text, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
    logits = model(**inputs).logits

predictions = torch.argmax(logits, dim=2)

# Convert token IDs back to readable tokens
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

# Match each token with its predicted label
for token, pred_id in zip(tokens, predictions[0]):
    label = model.config.id2label[pred_id.item()]
    print(f"{token}\t{label}")

[CLS]	O
The	O
ĠAustralian	B-LOC
Ġgovernment	O
Ġhas	O
Ġbanned	O
Ġall	O
ĠK	B-ORG
asp	B-ORG
ers	I-ORG
ky	I-ORG
ĠLab	I-ORG
Ġproducts	O
Ġand	O
Ġweb	O
Ġservices	O
Ġfrom	O
Ġits	O
Ġsystems	O
Ġand	O
Ġdevices	O
Ġfollowing	O
Ġan	O
Ġanalysis	O
Ġthat	O
Ġclaims	O
Ġthe	O
Ġcompany	O
Ġposes	O
Ġa	O
Ġsignificant	O
Ġsecurity	O
Ġrisk	O
Ġto	O
Ġthe	O
Ġcountry	O
.	O
Ġ"	O
After	O
Ġconsidering	O
Ġthreat	O
Ġand	O
Ġrisk	O
Ġanalysis	O
,	O
ĠI	O
Ġhave	O
Ġdetermined	O
Ġthat	O
Ġthe	O
Ġuse	O
Ġof	O
ĠK	B-ORG
asp	B-ORG
ers	I-ORG
ky	I-ORG
ĠLab	I-ORG
,	I-ORG
ĠInc	I-ORG
.	I-Software
Ġproducts	O
Ġand	O
Ġweb	O
Ġservices	O
Ġby	O
ĠAustralian	B-LOC
ĠGovernment	I-ORG
Ġentities	O
Ġposes	O
Ġan	O
Ġunacceptable	O
Ġsecurity	O
Ġrisk	O
Ġto	O
ĠAustralian	B-LOC
ĠGovernment	I-ORG
,	O
Ġnetworks	O
Ġand	O
Ġdata	O
,	O
Ġarising	O
Ġfrom	O
Ġthreats	O
Ġof	O
Ġforeign	O
Ġinterference	O
,	O
Ġesp	O
ionage	O
Ġand	O
Ġsabot	I-Event
age	O
,"	O
Ġjustified	O
Âł	O
Ste	B-PER
phan	B-PER
ie	I-PER
ĠFoster	I-PER
,	O
ĠSecretary	B-PER
Ġof	I-ORG
Ġthe	O
ĠDepartment	B-

In [11]:
# Run the model for Named Entity Recognition
predictions = ner_pipeline(file_text)

# Print results
for entity in predictions:
    print(entity)

{'entity_group': 'LOC', 'score': 0.9925492, 'word': ' Australian', 'start': 3, 'end': 14}
{'entity_group': 'ORG', 'score': 0.877729, 'word': ' Kaspersky Lab', 'start': 40, 'end': 54}
{'entity_group': 'ORG', 'score': 0.8132387, 'word': ' Kaspersky Lab, Inc.', 'start': 284, 'end': 304}
{'entity_group': 'LOC', 'score': 0.98369515, 'word': ' Australian', 'start': 333, 'end': 344}
{'entity_group': 'ORG', 'score': 0.7608618, 'word': ' Government', 'start': 344, 'end': 355}
{'entity_group': 'LOC', 'score': 0.9776384, 'word': ' Australian', 'start': 403, 'end': 414}
{'entity_group': 'ORG', 'score': 0.39863634, 'word': ' Government,', 'start': 414, 'end': 426}
{'entity_group': 'Event', 'score': 0.7024085, 'word': ' sabotage,"', 'start': 505, 'end': 516}
{'entity_group': 'PER', 'score': 0.99825937, 'word': ' Foster,', 'start': 536, 'end': 544}
{'entity_group': 'PER', 'score': 0.7029762, 'word': ' Secretary', 'start': 544, 'end': 554}
{'entity_group': 'ORG', 'score': 0.29678345, 'word': ' of', 's

