# Results analysis

In this notebook we will detail the experiments done on the chosen datasets (Basque and Japanese, both taken from Universal Dependencies) and the obtained results.

## Imports

In [4]:
from dataset_loader import Dataset
from pathlib import Path
from hmm import HiddenMarkovModel
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

## Load the datasets

In [5]:
# Basque
basque_dataset = Dataset(
    dataset_name='UD_Basque-BDT',
    train_path=Path('../data/UD_Basque-BDT/eu_bdt-ud-train.conllu'),
    dev_path=Path('../data/UD_Basque-BDT/eu_bdt-ud-dev.conllu'),
    test_path=Path('../data/UD_Basque-BDT/eu_bdt-ud-test.conllu'),
)

basque_lemmatized_dataset = Dataset(
    dataset_name='UD_Basque-BDT',
    train_path=Path('../data/UD_Basque-BDT/eu_bdt-ud-train.conllu'),
    dev_path=Path('../data/UD_Basque-BDT/eu_bdt-ud-dev.conllu'),
    test_path=Path('../data/UD_Basque-BDT/eu_bdt-ud-test.conllu'),
    lemmatized=True
)

#-------------------------
# Japanese

japanese_dataset = Dataset(
    dataset_name='UD_Japanese-GSD',
    train_path=Path('../data/UD_Japanese-GSD/ja_gsd-ud-train.conllu'),
    dev_path=Path('../data/UD_Japanese-GSD/ja_gsd-ud-dev.conllu'),
    test_path=Path('../data/UD_Japanese-GSD/ja_gsd-ud-test.conllu'),
)

japanese_lemmatized_dataset = Dataset(
    dataset_name='UD_Japanese-GSD',
    train_path=Path('../data/UD_Japanese-GSD/ja_gsd-ud-train.conllu'),
    dev_path=Path('../data/UD_Japanese-GSD/ja_gsd-ud-dev.conllu'),
    test_path=Path('../data/UD_Japanese-GSD/ja_gsd-ud-test.conllu'),
    lemmatized=True
)

## Hidden Markov Model Training

In [6]:
# Basque models
basque_hmm = HiddenMarkovModel(basque_dataset)
basque_lemmatized_hmm = HiddenMarkovModel(basque_lemmatized_dataset)

# Japanese models
japanese_hmm = HiddenMarkovModel(japanese_dataset)
japanese_lemmatized_hmm = HiddenMarkovModel(japanese_lemmatized_dataset)

## Basque performance evaluation

### Basque predictions

In [None]:
basque_predictions = basque_hmm.batch_predict(basque_dataset.test_data)
basque_lemmatized_predictions = basque_lemmatized_hmm.batch_predict(basque_lemmatized_dataset.test_data)