In [None]:
import pandas as pd
import sklearn
import os
from simpletransformers.ner import NERArgs, NERModel
import torch

In [ ]:
number = 1
train_data_dir = 'train.conll'
eval_data_dir = 'train_a.conll'
labels = ["B", ":", ";", ",", ".", "-", "...", "?", "!"]
model_type = "bert"
model_name = "dkleczek/bert-base-polish-cased-v1"
output_dir = f"model_dir_{model_name}_{number}"

In [ ]:
train_data = pd.read_csv(train_data_dir, sep="\t", header=None)
evaluate_data = pd.read_csv(eval_data_dir, sep="\t", header=None)

In [ ]:
model_args = NERArgs()
#model args
model_args.model_type = model_type
model_args.model_name = model_name
model_args.early_stopping_metric = "f1_weighted"
model_args.early_stopping_metric_minimize = False
model_args.early_stopping_patience = 10 
model_args.train_batch_size = 12
model_args.output_dir = output_dir
model_args.best_model_dir = os.path.join(output_dir,"best_model")

In [ ]:
model = NERModel(
        model_type=model_type,
        model_name=model_name,
        labels=labels,
        args=model_args,
        use_cuda=torch.cuda.is_available()
    )

In [ ]:
# Train the model
model.train_model(train_data, output_dir=output_dir,evaluate_data = evaluate_data, f1=sklearn.metrics.f1_score)

# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_data_dir)
print("Evaluation Results:", result)
with open("evaluation_result.txt", 'w') as file:
    for key, value in result.items():
        file.write(f'{key}: {value}\n')

print("Results saved successfully!")

In [ ]:
input_path = '/data/test-D/in.tsv'
with open(input_path, 'r', encoding='utf-8') as file:
    to_predict = [line.strip().split('\t')[1] for line in file if line.strip()]

predictions, model_outputs_pred = model.predict(to_predict, split_on_space=True)

output_path = 'predictions_output.txt'

with open(output_path, 'w', encoding='utf-8') as file:
    for text, prediction in zip(to_predict, predictions):
        file.write(f"Text: {text}\n")
        file.write("Predictions:\n")
        for token, label in zip(text.split(), prediction):
            file.write(f"{token}: {label}\n")
        file.write("\n")

print("Predictions saved successfully!")