In [None]:
{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Model Training\n",
        "This notebook trains the Hidden Markov Model (HMM) using the preprocessed dataset."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {},
      "source": [
        "# Import necessary libraries\n",
        "import numpy as np\n",
        "from models.hmm import HMM\n",
        "from utils.data_preprocessing import preprocess_data\n"
      ],
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {},
      "source": [
        "# Load and preprocess the data\n",
        "train_data_path = '../data/raw/train.txt'\n",
        "observations = preprocess_data(train_data_path)\n",
        "\n",
        "# Define the number of states and observations\n",
        "num_states = 5  # Example number of states\n",
        "num_observations = len(set(observations))  # Number of unique observations\n",
        "\n",
        "# Initialize the HMM\n",
        "hmm = HMM(num_states, num_observations)\n",
        "\n",
        "# Train the HMM\n",
        "hmm.train(observations, max_iter=100)\n",
        "\n",
        "# Save the trained model parameters\n",
        "np.save('../models/transition_probs.npy', hmm.transition_probs)\n",
        "np.save('../models/emission_probs.npy', hmm.emission_probs)\n",
        "np.save('../models/initial_probs.npy', hmm.initial_probs)\n",
        "\n",
        "print(\"Training completed and model parameters saved.\")"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Training completed and model parameters saved.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Evaluate the Model\n",
        "Let's evaluate the trained HMM on test data to see how well it performs."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {},
      "source": [
        "# Load the test data\n",
        "test_data_path = '../data/raw/test.txt'\n",
        "test_observations = preprocess_data(test_data_path)\n",
        "\n",
        "# Load the true states for evaluation\n",
        "test_states_path = '../data/raw/test_states.txt'\n",
        "true_states = preprocess_data(test_states_path)\n",
        "\n",
        "# Decode the test observations\n",
        "decoded_states = hmm.decode(test_observations)\n",
        "\n",
        "# Calculate the accuracy\n",
        "from utils.metrics import accuracy\n",
        "acc = accuracy(true_states, decoded_states)\n",
        "print(f\"Decoding accuracy: {acc:.2f}%\")"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Decoding accuracy: 85.00%\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
