# Model inference 

> How can we check if the models is trained correctly and predict with it?

- title-block-banner: true

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os

os.chdir("..")

from embeddings.data.data_loader import HuggingFaceDataLoader
from embeddings.defaults import DATASET_PATH, RESULTS_PATH
from embeddings.embedding.auto_flair import AutoFlairWordEmbedding
from embeddings.evaluator.sequence_labeling_evaluator import SequenceLabelingEvaluator
from embeddings.model.flair_model import FlairModel
from embeddings.pipeline.standard_pipeline import StandardPipeline
from embeddings.task.flair_task.sequence_labeling import SequenceLabeling
from embeddings.transformation.flair_transformation.column_corpus_transformation import (
    ColumnCorpusTransformation,
)
from embeddings.data.dataset import Dataset

from embeddings.transformation.flair_transformation.downsample_corpus_transformation import (
    DownsampleFlairCorpusTransformation,
)
from embeddings.transformation.flair_transformation.split_sample_corpus_transformation import (
    SampleSplitsFlairCorpusTransformation,
)
from embeddings.utils.utils import build_output_path

### Run downsampled flair pipeline

In [None]:
embedding_name_or_path = "clarin-pl/word2vec-kgr10"
dataset_name = "clarin-pl/kpwr-ner"

output_path = build_output_path(RESULTS_PATH, embedding_name_or_path, dataset_name)

dataset = Dataset(dataset_name)
data_loader = HuggingFaceDataLoader()
transformation = (
    ColumnCorpusTransformation("tokens", "ner")
    .then(SampleSplitsFlairCorpusTransformation(dev_fraction=0.1, seed=441))
    .then(DownsampleFlairCorpusTransformation(downsample_train=0.005, downsample_dev=0.01, downsample_test=0.01))
)
task = SequenceLabeling(
    output_path,
    hidden_size=256,
    task_train_kwargs={"max_epochs": 1, "mini_batch_size": 64},
)
embedding = AutoFlairWordEmbedding.from_hub(embedding_name_or_path)
model = FlairModel(embedding, task)
evaluator = SequenceLabelingEvaluator()

pipeline = StandardPipeline(dataset, data_loader, transformation, model, evaluator)

In [None]:
_ = pipeline.run()

### Load model from checkpoint

In [None]:
!ls $output_path

In [None]:
task_from_ckpt = SequenceLabeling.from_checkpoint(checkpoint_path=(output_path / "final-model.pt"), output_path=output_path)

### Predict for test data

In [None]:
loaded_data = data_loader.load(dataset)
transformed_data = transformation.transform(loaded_data)
test_data = transformed_data.test

In [None]:
y_pred, loss = task_from_ckpt.predict(test_data)
y_true = task_from_ckpt.get_y(test_data, task_from_ckpt.y_type, task_from_ckpt.y_dictionary)

In [None]:
evaluator.evaluate({"y_pred": y_pred, "y_true": y_true})