In [1]:
from scripts.model_utils import Utils
import torch
from transformers import RobertaForTokenClassification, RobertaTokenizerFast, TrainingArguments, Trainer
from scripts.Reader import obtain_dataset, obtain_label_list

print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    device = torch.device('cuda')
print("Current Device:", torch.cuda.current_device(), torch.cuda.get_device_name(torch.cuda.current_device()))

CUDA available: True
Current Device: 0 NVIDIA GeForce RTX 4070 Ti


In [5]:
# Load tokenizer and model
label_list, label2id, id2label = obtain_label_list("OzRock")
model_name = 'roberta-large'
tokenizer = RobertaTokenizerFast.from_pretrained(model_name, add_prefix_space=True)
model = RobertaForTokenClassification.from_pretrained(model_name, num_labels=len(label_list), label2id=label2id, id2label=id2label)

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
training_args = TrainingArguments(
    output_dir="./results/Geo-NER",
    logging_dir="./logs/Geo-NER",
    eval_strategy="steps",
    save_strategy="steps",
    logging_steps=100,
    num_train_epochs=1,
    save_total_limit=1,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=5e-5,
    load_best_model_at_end=True,
    metric_for_best_model="f1"
)

In [10]:
utils = Utils(tokenizer, label_list)
tokenized_datasets = utils.tokenize_datasets(obtain_dataset("OzRock", ["train", "eval"]))

Tokenizing datasets...


Map:   0%|          | 0/31942 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [6]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 31942
    })
    eval: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 2000
    })
})

In [7]:
geo_ner = Trainer(
    model=model,
    args=training_args,
    compute_metrics=utils.compute_metrics,
    data_collator=utils.data_collator,
    tokenizer=tokenizer,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["eval"],
)

  geo_ner = Trainer(


In [8]:
geo_ner.train()

Step,Training Loss,Validation Loss,Precision,Recall,F1
100,0.2693,0.234656,0.569968,0.694251,0.626001
200,0.1275,0.208862,0.659252,0.716221,0.686557
300,0.091,0.20784,0.72636,0.767851,0.746529
400,0.0694,0.208985,0.734278,0.75888,0.746376
500,0.0628,0.222484,0.690847,0.779385,0.73245
600,0.0523,0.248187,0.723077,0.757232,0.73976
700,0.0478,0.238044,0.757151,0.770597,0.763815
800,0.0476,0.231846,0.744933,0.760344,0.75256
900,0.0374,0.272573,0.750612,0.729586,0.73995
1000,0.0329,0.243237,0.749648,0.780117,0.764579


TrainOutput(global_step=1997, training_loss=0.05403430526353266, metrics={'train_runtime': 9468.3097, 'train_samples_per_second': 3.374, 'train_steps_per_second': 0.211, 'total_flos': 1.417626143339916e+16, 'train_loss': 0.05403430526353266, 'epoch': 1.0})

In [10]:
geo_ner.evaluate(tokenized_datasets["eval"])

{'eval_loss': 0.2432372272014618,
 'eval_precision': 0.7496481351161154,
 'eval_recall': 0.7801171731966313,
 'eval_f1': 0.7645792212452898,
 'eval_runtime': 38.8285,
 'eval_samples_per_second': 51.508,
 'eval_steps_per_second': 1.623,
 'epoch': 1.0}

In [11]:
geo_ner.save_model("./results/Geo-NER/final_model")

In [9]:
# Load the model and tokenizer from the folder
model = RobertaForTokenClassification.from_pretrained("./results/Geo-NER/final_model/")
tokenizer = RobertaTokenizerFast.from_pretrained("./results/Geo-NER/final_model/")

In [None]:
conf = [[0]*13]*13

encodings=tokenizer(list(tokenized_datasets['eval']['tokens']), padding=True, truncation=True, return_tensors="pt", is_split_into_words=True)

with torch.no_grad():
    outputs = model(**encodings)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)  


In [32]:
conf = [[0 for _ in range(13)] for _ in range(13)]
for preds, actua in zip(predictions,tokenized_datasets['eval']['ner_tags']):
    preds = preds[:len(actua)]
    for j, predicted_label in enumerate(preds):
        conf[actua[j]][predicted_label] += 1  

conf

[[56, 6, 3, 14, 4, 9, 55, 0, 3, 10, 13, 0, 830],
 [9, 191, 6, 34, 2, 0, 3, 3, 5, 19, 7, 0, 1034],
 [2, 14, 6, 5, 0, 4, 3, 0, 8, 1, 6, 0, 333],
 [2, 76, 1, 210, 16, 26, 9, 0, 5, 99, 27, 0, 1409],
 [7, 5, 1, 24, 17, 8, 7, 0, 1, 16, 51, 0, 503],
 [9, 1, 2, 6, 2, 6, 8, 0, 0, 3, 4, 0, 169],
 [120, 2, 1, 9, 10, 12, 39, 0, 2, 4, 10, 0, 475],
 [0, 28, 0, 2, 0, 0, 1, 0, 0, 3, 1, 0, 56],
 [4, 12, 75, 1, 0, 0, 1, 2, 8, 3, 2, 0, 193],
 [2, 38, 0, 139, 3, 6, 2, 0, 2, 48, 4, 0, 512],
 [29, 8, 0, 32, 158, 12, 8, 0, 1, 15, 70, 0, 447],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2],
 [753, 1586, 201, 1713, 346, 217, 427, 27, 197, 777, 732, 0, 36257]]

In [38]:
def metr(conf, index):
    TP = conf[index][index]
    FN = sum(conf[index])-TP
    FP = sum(uh[index] for uh in conf)-TP
    try:
        recall = TP/(TP+FN)
    except ZeroDivisionError:
        recall = 0
    try:
        prec = TP/(TP+FP)
    except ZeroDivisionError:
        prec=0
    try:
        f1 = (2*recall*prec)/(recall+prec)
    except ZeroDivisionError:
        f1=0
    return recall, prec, f1

for i in range(13):
    r,p,f = metr(conf,i)
    print(round(r,4),round(p,4),round(f,4))


0.0558 0.0564 0.0561
0.1455 0.0971 0.1165
0.0157 0.0203 0.0177
0.1117 0.0959 0.1032
0.0266 0.0305 0.0284
0.0286 0.0199 0.0235
0.057 0.0693 0.0626
0.0 0.0 0
0.0266 0.0345 0.03
0.0635 0.0481 0.0547
0.0897 0.0755 0.082
0.0 0 0
0.8386 0.8588 0.8486
