In [1]:
import nltk
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np
from collections import defaultdict, Counter


In [2]:

# resources are downloaded
nltk.download('conll2002')

# Load the CoNLL-2002 dataset (Spanish NER)
train_data = list(nltk.corpus.conll2002.iob_sents('esp.train'))
test_data = list(nltk.corpus.conll2002.iob_sents('esp.testb'))

# Extract states (entity tags) and observations (words)
def get_tags(data):
    tags = set()
    for sentence in data:
        for word, pos, tag in sentence:
            tags.add(tag)
    return list(tags)

def get_words(data):
    words = set()
    for sentence in data:
        for word, pos, tag in sentence:
            words.add(word.lower())
    return list(words)

tags = get_tags(train_data)
words = get_words(train_data)

# Initialize probabilities
transition_counts = defaultdict(Counter)
emission_counts = defaultdict(Counter)
start_counts = Counter()

# Calculate counts for transitions, emissions, and start states
for sentence in train_data:
    prev_tag = '<s>'
    for word, pos, tag in sentence:
        start_counts[tag] += (prev_tag == '<s>')
        transition_counts[prev_tag][tag] += 1
        emission_counts[tag][word.lower()] += 1
        prev_tag = tag
    transition_counts[prev_tag]['</s>'] += 1

# Convert counts to probabilities
def normalize(counter):
    total = sum(counter.values())
    return {key: value / total for key, value in counter.items()}

start_probs = normalize(start_counts)
transition_probs = {prev_tag: normalize(tags_counts) for prev_tag, tags_counts in transition_counts.items()}
emission_probs = {tag: normalize(words_counts) for tag, words_counts in emission_counts.items()}

# Define Viterbi algorithm for decoding
def viterbi(sequence, states, start_prob, transition_prob, emission_prob):
    V = [{}]
    path = {}

    for state in states:
        V[0][state] = start_prob.get(state, 0) * emission_prob.get(state, {}).get(sequence[0], 1e-6)
        path[state] = [state]

    for t in range(1, len(sequence)):
        V.append({})
        newpath = {}

        for state in states:
            (prob, best_prev_state) = max(
                (V[t - 1][prev_state] * transition_prob.get(prev_state, {}).get(state, 1e-6) *
                 emission_prob.get(state, {}).get(sequence[t], 1e-6), prev_state)
                for prev_state in states
            )

            V[t][state] = prob
            newpath[state] = path[best_prev_state] + [state]

        path = newpath

    (prob, best_final_state) = max((V[-1][state], state) for state in states)
    return path[best_final_state]

# Evaluate the HMM model on the test dataset
def evaluate_model(test_data, states, transition_probs, emission_probs):
    y_true = []
    y_pred = []
    for sentence in test_data:
        words = [word.lower() for word, pos, tag in sentence]
        true_tags = [tag for word, pos, tag in sentence]
        predicted_tags = viterbi(words, states, start_probs, transition_probs, emission_probs)
        y_true.extend(true_tags)
        y_pred.extend(predicted_tags)
    return y_true, y_pred

# Get true and predicted tags
y_true, y_pred = evaluate_model(test_data, tags, transition_probs, emission_probs)

# Report accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy: {accuracy:.4f}')

# Generate and display confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred, labels=tags)
print("Confusion Matrix:")
print(conf_matrix)

# Display classification report
print(classification_report(y_true, y_pred, labels=tags))


[nltk_data] Downloading package conll2002 to
[nltk_data]     C:\Users\22anj\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\conll2002.zip.


Accuracy: 0.9395
Confusion Matrix:
[[  735    72    12     8     0     9     3   215    30]
 [   75  1014     3     0     1     2    11   275    19]
 [   18     5   484     0     4     0     1   222     1]
 [   19     6     4   143     3     6     1   128    15]
 [    7     2    12     0   463     0     1   144     5]
 [   11     7     4     8     6   161     3   327    30]
 [    5    14     2     0     0     9   100   204     5]
 [   36   147    38    15    26   131    75 44708   179]
 [   41    17     1    12     1    35    11   381   605]]
              precision    recall  f1-score   support

       B-LOC       0.78      0.68      0.72      1084
       B-ORG       0.79      0.72      0.76      1400
       B-PER       0.86      0.66      0.75       735
       I-LOC       0.77      0.44      0.56       325
       I-PER       0.92      0.73      0.81       634
      I-MISC       0.46      0.29      0.35       557
      B-MISC       0.49      0.29      0.37       339
           O      