In [ ]:
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
from torch.utils.data import DataLoader, IterableDataset

In [None]:
# Custom IterableDataset to handle tokenized examples
class TokenizedExamplesDataset(IterableDataset):
    def __init__(self, examples):
        self.examples = examples

    def __iter__(self):
        return iter(self.examples)

    def __len__(self):
        return len(self.examples)

# Model class for handling inference operations
class InferenceModel:
    def __init__(self, model_path, max_seq_len=256, stride=0.8, use_sliding_window=True, batch_size=1, device="cpu"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.model = AutoModelForTokenClassification.from_pretrained(model_path).to(device)
        self.model.eval()

        self.use_sliding_window = use_sliding_window
        self.stride = stride
        self.max_seq_len = max_seq_len
        self.batch_size = batch_size
        self.device = device
        self.fp16 = device != "cpu"

        if self.fp16:
            self._check_fp16_support()

    def _check_fp16_support(self):
        try:
            from torch.cuda import amp
        except ImportError as e:
            print("FP16 not supported: ", e)
            self.fp16 = False

    def prepare_examples(self, texts):
        tokenized_examples, window_counts, token_mappings = [], [], []
        for text in texts:
            tokens, word_ids = self._tokenize_text(text)
            tokenized_examples.append(tokens)
            window_counts.append(len(tokens) // self.max_seq_len + 1)
            token_mappings.append(word_ids)
        return TokenizedExamplesDataset(tokenized_examples), window_counts, token_mappings

    def _tokenize_text(self, text):
        tokenized_text = self.tokenizer.encode_plus(text, add_special_tokens=True, return_tensors='pt')
        return tokenized_text['input_ids'].to(self.device), tokenized_text['attention_mask'].to(self.device)

    def predict(self, texts):
        dataset, window_counts, token_mappings = self.prepare_examples(texts)
        predictions = []
        for batch in DataLoader(dataset, batch_size=self.batch_size):
            with torch.no_grad(), torch.cuda.amp.autocast(enabled=self.fp16):
                outputs = self.model(**batch)
                logits = outputs[0]
                predictions.extend(torch.argmax(logits, dim=-1).cpu().numpy())
        return predictions, window_counts, token_mappings

In [ ]:
# Configuration variables
model_path = "path_to_model_directory"
input_file = "path_to_input_file.tsv"
output_file = "path_to_output_file.tsv"

# Instantiate model
model = InferenceModel(model_path)

# Reading and processing input
with open(input_file, 'r') as file:
    texts = [line.strip().split('\t')[1] for line in file if line.strip()]

# Predicting
predictions, _, _ = model.predict(texts)

# Writing predictions to output file
with open(output_file, 'w') as file:
    for pred in predictions:
        file.write(f'{pred}\n')