In [1]:
from utils_pos import get_word_tag, preprocess
import pandas as pd
import numpy as np
import math
from collections import defaultdict

In [2]:
# Loading trining dataset
with open("WSJ_02-21.pos", "r") as f:
    training_corpus = f.readlines()
    # training_corpus = f.read().split("\n")

print(training_corpus[:50])

['In\tIN\n', 'an\tDT\n', 'Oct.\tNNP\n', '19\tCD\n', 'review\tNN\n', 'of\tIN\n', '``\t``\n', 'The\tDT\n', 'Misanthrope\tNN\n', "''\t''\n", 'at\tIN\n', 'Chicago\tNNP\n', "'s\tPOS\n", 'Goodman\tNNP\n', 'Theatre\tNNP\n', '(\t(\n', '``\t``\n', 'Revitalized\tVBN\n', 'Classics\tNNS\n', 'Take\tVBP\n', 'the\tDT\n', 'Stage\tNN\n', 'in\tIN\n', 'Windy\tNNP\n', 'City\tNNP\n', ',\t,\n', "''\t''\n", 'Leisure\tNN\n', '&\tCC\n', 'Arts\tNNS\n', ')\t)\n', ',\t,\n', 'the\tDT\n', 'role\tNN\n', 'of\tIN\n', 'Celimene\tNNP\n', ',\t,\n', 'played\tVBN\n', 'by\tIN\n', 'Kim\tNNP\n', 'Cattrall\tNNP\n', ',\t,\n', 'was\tVBD\n', 'mistakenly\tRB\n', 'attributed\tVBN\n', 'to\tTO\n', 'Christina\tNNP\n', 'Haag\tNNP\n', '.\t.\n', '\n']


In [3]:
# loading Vocab
with open("hmm_vocab.txt", "r") as f:
    vocab_l = f.read().split("\n")

print(vocab_l[:50])
print()
print(vocab_l[-50:])

['!', '#', '$', '%', '&', "'", "''", "'40s", "'60s", "'70s", "'80s", "'86", "'90s", "'N", "'S", "'d", "'em", "'ll", "'m", "'n'", "'re", "'s", "'til", "'ve", '(', ')', ',', '-', '--', '--n--', '--unk--', '--unk_adj--', '--unk_adv--', '--unk_digit--', '--unk_noun--', '--unk_punct--', '--unk_upper--', '--unk_verb--', '.', '...', '0.01', '0.0108', '0.02', '0.03', '0.05', '0.1', '0.10', '0.12', '0.13', '0.15']

['yards', 'yardstick', 'year', 'year-ago', 'year-before', 'year-earlier', 'year-end', 'year-on-year', 'year-round', 'year-to-date', 'year-to-year', 'yearlong', 'yearly', 'years', 'yeast', 'yelled', 'yelling', 'yellow', 'yen', 'yes', 'yesterday', 'yet', 'yield', 'yielded', 'yielding', 'yields', 'you', 'young', 'younger', 'youngest', 'youngsters', 'your', 'yourself', 'youth', 'youthful', 'yuppie', 'yuppies', 'zero', 'zero-coupon', 'zeroing', 'zeros', 'zinc', 'zip', 'zombie', 'zone', 'zones', 'zoning', '{', '}', '']


In [4]:
# creating vocav dictionary {word: index}
vocab = {}

for i, word in enumerate(sorted(vocab_l)):
    vocab[word] = i

# print first 10 items of vocab
n = 10
for k, v in list(vocab.items())[:n]:
    print(f"{k}:{v}")

:0
!:1
#:2
$:3
%:4
&:5
':6
'':7
'40s:8
'60s:9


In [5]:
# Load test data
with open("WSJ_24.pos", "r") as f:
    y = f.readlines()

print(y[:50])
print()
print(y[-50:])

['The\tDT\n', 'economy\tNN\n', "'s\tPOS\n", 'temperature\tNN\n', 'will\tMD\n', 'be\tVB\n', 'taken\tVBN\n', 'from\tIN\n', 'several\tJJ\n', 'vantage\tNN\n', 'points\tNNS\n', 'this\tDT\n', 'week\tNN\n', ',\t,\n', 'with\tIN\n', 'readings\tNNS\n', 'on\tIN\n', 'trade\tNN\n', ',\t,\n', 'output\tNN\n', ',\t,\n', 'housing\tNN\n', 'and\tCC\n', 'inflation\tNN\n', '.\t.\n', '\n', 'The\tDT\n', 'most\tRBS\n', 'troublesome\tJJ\n', 'report\tNN\n', 'may\tMD\n', 'be\tVB\n', 'the\tDT\n', 'August\tNNP\n', 'merchandise\tNN\n', 'trade\tNN\n', 'deficit\tNN\n', 'due\tJJ\n', 'out\tIN\n', 'tomorrow\tNN\n', '.\t.\n', '\n', 'The\tDT\n', 'trade\tNN\n', 'gap\tNN\n', 'is\tVBZ\n', 'expected\tVBN\n', 'to\tTO\n', 'widen\tVB\n', 'to\tTO\n']

[',\t,\n', 'very\tRB\n', 'sensitive\tJJ\n', ',\t,\n', "''\t''\n", 'said\tVBD\n', 'Andrew\tNNP\n', 'Mlangeni\tNNP\n', ',\t,\n', 'another\tDT\n', 'early\tJJ\n', 'Umkhonto\tNNP\n', 'leader\tNN\n', 'who\tWP\n', 'is\tVBZ\n', 'now\tRB\n', '63\tCD\n', '.\t.\n', '\n', '``\t``\n', 'We\tPRP\n

In [6]:
# preprocess vocab
_, prep = preprocess(vocab, "test.words")

print(type(prep))
print()
print(f"Sample of preprossed vocab:\n{prep[:50]}")
print()
print(f"Length of preprocesed vocab {len(prep)}")

<class 'list'>

Sample of preprossed vocab:
['The', 'economy', "'s", 'temperature', 'will', 'be', 'taken', 'from', 'several', '--unk--', 'points', 'this', 'week', ',', 'with', 'readings', 'on', 'trade', ',', 'output', ',', 'housing', 'and', 'inflation', '.', '--n--', 'The', 'most', 'troublesome', 'report', 'may', 'be', 'the', 'August', 'merchandise', 'trade', 'deficit', 'due', 'out', 'tomorrow', '.', '--n--', 'The', 'trade', 'gap', 'is', 'expected', 'to', 'widen', 'to']

Length of preprocesed vocab 34199


### Bayes Theroem

In [7]:
# building 3 dictionaries for transition, emissin and tags counts
def build_dict_counts(corpus, vocab):
    transition_counts_dict = defaultdict(int)
    emission_counts_dict = defaultdict(int)
    tags_counts_dict = defaultdict(int)

    # start of a sentence tag
    prev_tag = "--s--"
    i = 0
    
    # l = [tuple(w.split("\t")) for w in corpus]
    # tags = [t for  _, t in l]
    # print(l)
    # print(tags)

    for word_tag in corpus:
        i += 1
        if i % 50000 == 0:
            print(f"We are at line {i}")
        # split word_tag into owrd and corsponding tag
        word, tag = get_word_tag(word_tag, vocab)
        # increment (t_minus_i, t_i) by 1
        transition_counts_dict[(prev_tag, tag)] += 1
        # increment (t, word) by 1
        emission_counts_dict[(tag, word)] += 1
        # increment tags by 1
        tags_counts_dict[tag] += 1
        # make prev_tag equals tag
        prev_tag = tag

    return transition_counts_dict, emission_counts_dict, tags_counts_dict

In [8]:
# l = [(w, t) for w]
n = 5
transition_counts, emission_counts, tags_counts = build_dict_counts(training_corpus, vocab)

for k,v in list(transition_counts.items())[:n]:
    print(f"Transition counts:\n{k}:{v}")
print()

for k,v in list(emission_counts.items())[:n]:
    print(f"Emission counts:\n{k}:{v}")
print()

for k,v in list(tags_counts.items())[:n]:
    print(f"Tag counts:\n{k}:{v}")
print()


We are at line 50000
We are at line 100000
We are at line 150000
We are at line 200000
We are at line 250000
We are at line 300000
We are at line 350000
We are at line 400000
We are at line 450000
We are at line 500000
We are at line 550000
We are at line 600000
We are at line 650000
We are at line 700000
We are at line 750000
We are at line 800000
We are at line 850000
We are at line 900000
We are at line 950000
Transition counts:
('--s--', 'IN'):5050
Transition counts:
('IN', 'DT'):32364
Transition counts:
('DT', 'NNP'):9044
Transition counts:
('NNP', 'CD'):1752
Transition counts:
('CD', 'NN'):7377

Emission counts:
('IN', 'In'):1735
Emission counts:
('DT', 'an'):3142
Emission counts:
('NNP', 'Oct.'):317
Emission counts:
('CD', '19'):100
Emission counts:
('NN', 'review'):36

Tag counts:
IN:98554
Tag counts:
DT:81842
Tag counts:
NNP:91466
Tag counts:
CD:36568
Tag counts:
NN:132935



In [9]:
# get list of POS tags
tags = sorted(tags_counts.keys())
print(f"# Tags: {len(tags)}")
print()
print(f"Tags: {tags}")

# Tags: 46

Tags: ['#', '$', "''", '(', ')', ',', '--s--', '.', ':', 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNP', 'NNPS', 'NNS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB', '``']


In [10]:
def predict_POS(prep, y, emission_counts, vocab, tags):
    correct = 0
    all_words = set(emission_counts.keys())
    total = len(y)

    for y_tup, word in zip(y, prep):
        y_tup_l = y_tup.split()
        
        if len(y_tup_l) == 2:
            pos_true = y_tup_l[1]
        else:
            continue
            
        count_final = 0
        pos_final = ""
        
        if word in vocab:
            for pos in tags:
                key = (pos, word)
                
                if key in emission_counts:
                    count = emission_counts[key]
                    
                    if count > count_final:
                        count_final = count
                        pos_final = pos
                        
        if pos_final == pos_true:
            correct += 1

    accuracy = correct / total
    
    return accuracy

In [11]:
i = 0
for y_tup, word in zip(y, prep):
    i += 1
    print(y_tup, word)
    if i == 5:
        break

The	DT
 The
economy	NN
 economy
's	POS
 's
temperature	NN
 temperature
will	MD
 will


In [12]:
acc = predict_POS(prep, y, emission_counts, vocab, tags)
print(f"Accuracy = {acc}")

Accuracy = 0.8888563993099213


### Hidden Markov Model (HMM)

In [13]:
def create_transition_matrix(states, transition_counts, tags_counts, alpha = 0.001):
    rows = len(states)
    cols = len(states)
    N = len(states)
    # initialize current and next states
    current_states = states
    next_states = states
    # initial count matrix
    transition_matrix = pd.DataFrame(0, index = current_states, columns = next_states)
    # print(tags_counts)
    # transition probability matrix
    for i in range(rows):
        for j in range(cols):
            curr = transition_matrix.index[i]
            next = transition_matrix.columns[j]
            transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
            
    return transition_matrix

In [14]:
transition_prob_matrix = create_transition_matrix(tags, transition_counts, tags_counts)
transition_prob_matrix

  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transition_matrix.iat[i, j] = (transition_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))
  transiti

Unnamed: 0,#,$,'',(,),",",--s--,.,:,CC,...,VBD,VBG,VBN,VBP,VBZ,WDT,WP,WP$,WRB,``
#,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,0.007047013,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,...,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06,7.039973e-06
$,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,...,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07,1.356476e-07
'',1.445286e-07,0.0001446731,0.006937517,0.006792989,0.005058645,0.001011845,0.3293808,0.002312602,0.008238274,0.03613229,...,0.08859617,0.008816389,0.005203174,0.002457131,0.12574,0.00867186,0.001879016,0.0002892017,0.003179774,0.002746188
(,7.320398e-07,0.1691019,7.320398e-07,7.320398e-07,7.320398e-07,7.320398e-07,7.320398e-07,7.320398e-07,7.320398e-07,0.02489008,...,0.001464812,0.00658909,0.01756969,0.0007327718,0.0007327718,0.00732113,0.004392971,7.320398e-07,0.002928891,0.0307464
),7.267199e-07,0.0007274466,7.267199e-07,0.0007274466,7.267199e-07,0.1635127,0.108282,0.1380775,0.07412616,0.05305128,...,0.03052296,0.004361046,0.007267926,0.02180232,0.04941768,0.004361046,0.002907606,7.267199e-07,7.267199e-07,0.0007274466
",",2.052248e-08,0.001559729,0.05738088,0.0003078578,2.052248e-08,2.052248e-08,0.0001231554,2.052248e-08,0.0001026329,0.09267956,...,0.05243497,0.04488269,0.02037885,0.008598941,0.03201509,0.03577071,0.01237508,0.002113836,0.008291104,0.01307284
--s--,2.513052e-05,0.0007029767,0.0004017117,0.003615205,2.513052e-05,2.510541e-08,2.510541e-08,2.510541e-08,0.002560777,0.05696421,...,0.0008285038,0.01210083,0.00577427,0.0003515009,0.001405928,0.0007782929,0.003087991,2.513052e-05,0.006251273,0.07539158
.,2.533053e-08,2.533053e-08,0.05932414,0.001342544,0.005395429,7.601693e-05,0.9299599,0.0001773391,0.0002026696,2.535586e-05,...,2.533053e-08,2.533053e-08,2.535586e-05,2.535586e-05,2.535586e-05,2.533053e-08,5.06864e-05,2.533053e-08,2.535586e-05,0.0004559749
:,0.000419317,0.02703264,0.001257532,0.0006288707,2.095537e-07,0.0006288707,0.06265677,0.01886004,0.001676639,0.06726695,...,0.01718361,0.0222129,0.01278299,0.01131611,0.01823138,0.01257343,0.005239053,0.0002097633,0.006496375,0.06642874
CC,0.000167077,0.0197102,4.17588e-08,0.000375871,4.180056e-05,0.007683662,4.17588e-08,4.17588e-08,0.0002923534,0.0002088358,...,0.03808407,0.02129703,0.01432331,0.01240241,0.0230509,0.001044012,0.002129741,0.0002505946,0.004134163,0.005929792


In [15]:
transition_prob_matrix.loc["SYM", "UH"]

1.7227715949419426e-05

In [16]:
transition_counts[("CD", "CC")] / tags_counts["CD"]

0.016653905053598775

In [None]:
# def create_emission_prob_matrix(states, observations, emission_counts, tags_counts, alpha = 0.001):
#     rows = len(states)
#     cols = len(observations)
#     N = len(vocab)
#     # initialize current and next states
#     current_states = states
#     next_states = observations
#     # matrix initialization
#     emission_prob_matrix = pd.DataFrame(0, index = current_states, columns = next_states)
#     # fill out cells
#     for i in range(rows):
#         for j in range(cols):
#             curr = emission_prob_matrix.index[i]
#             next = emission_prob_matrix.columns[j]
#             emission_prob_matrix.iat[i, j] = (emission_counts[(curr, next)] + alpha) / (tags_counts[curr] + (alpha * N))

#     return emission_prob_matrix

In [28]:
# More efficient implementation
def create_emission_matrix(tag_counts, emission_counts, vocab, alpha = 0.001):
    # get the number of POS tag and words
    num_tags = len(tag_counts)
    num_words = len(vocab)
    
    all_tags = sorted(tag_counts.keys())

    emission_prob_matrix = np.zeros((num_tags, num_words))
    emis_keys = set(list(emission_counts.keys()))

    for i in range(num_tags):
        for j in range(num_words):
            # Initialize the emission count for the (POS tag, word) to zero
            count = 0
                    
            # Define the (POS tag, word) tuple for this row and column
            key =  (all_tags[i], vocab[j])

            # check if the (POS tag, word) tuple exists as a key in emission counts
            if key in emis_keys: # complete this line
        
                # Get the count of (POS tag, word) from the emission_counts d
                count = emission_counts[key]
                
            # Get the count of the POS tag
            count_tag = tag_counts[key[0]]
                
            # Apply smoothing and store the smoothed value 
            emission_prob_matrix[i,j] = (count + alpha) / (count_tag + alpha * num_words)

    return emission_prob_matrix

In [29]:
# creating your emission probability matrix. this takes a few minutes to run. 
emission_prob_matrix = create_emission_matrix(tags_counts, emission_counts, list(vocab))

print(f"View Matrix position at row 0, column 0: {emission_prob_matrix[0,0]:.9f}")
print(f"View Matrix position at row 3, column 1: {emission_prob_matrix[3,1]:.9f}")

# Try viewing emissions for a few words in a sample dataframe
cidx  = ['725','adroitly','engineers', 'promoted', 'synergy']

# Get the integer ID for each word
cols = [vocab[a] for a in cidx]

# Choose POS tags to show in a sample dataframe
rvals =['CD','NN','NNS', 'VB','RB','RP']

# For each POS tag, get the row number from the 'states' list
rows = [tags.index(a) for a in rvals]

# Get the emissions for the sample of words, and the sample of POS tags
emission_prob_matrix_sub = pd.DataFrame(emission_prob_matrix[np.ix_(rows,cols)], index=rvals, columns = cidx )
print(emission_prob_matrix_sub)

View Matrix position at row 0, column 0: 0.000006032
View Matrix position at row 3, column 1: 0.000000720
              725      adroitly     engineers      promoted       synergy
CD   8.201296e-05  2.732854e-08  2.732854e-08  2.732854e-08  2.732854e-08
NN   7.521128e-09  7.521128e-09  7.521128e-09  7.521128e-09  2.257091e-05
NNS  1.670013e-08  1.670013e-08  4.676203e-04  1.670013e-08  1.670013e-08
VB   3.779036e-08  3.779036e-08  3.779036e-08  3.779036e-08  3.779036e-08
RB   3.226454e-08  6.456135e-05  3.226454e-08  3.226454e-08  3.226454e-08
RP   3.723317e-07  3.723317e-07  3.723317e-07  3.723317e-07  3.723317e-07


### Viterbi Algorithm

In [36]:
def init_viterbi(states, tags_counts, transition_prob_matrix, emission_prob_matrix, corpus, vocab):
    num_tags = len(tags)
    # initialize best_probs matrix with zeros
    best_probs = np.zeros((num_tags, len(corpus)))
    # initialize best_paths matrix with zeros
    best_paths = np.zeros((num_tags, len(corpus)), dtype = int)
    # define the start token
    s_token_idx = states.index("--s--")
    
    for i in range(len(states)):
        if transition_prob_matrix.iat[s_token_idx, i] == 0:
            best_probs[i, 0] = float("-inf")
        
        else:
            best_probs[i, 0] = math.log(transition_prob_matrix.iat[s_token_idx, i]) + math.log(emission_prob_matrix[i, vocab[corpus[0]]])
    
    return best_probs, best_paths

In [38]:
best_probs, best_paths = init_viterbi(tags, tags_counts, transition_prob_matrix, emission_prob_matrix, prep, vocab)

In [42]:
print(f"best_probs at index (0,0) is {best_probs[0, 0]}")
print(f"best_paths at index (2,3) is {best_paths[2, 3]}")

best_probs at index (0,0) is -22.60982633354825
best_paths at index (2,3) is 0


In [44]:
# Forward pass in viterbi algo.
def viterbi_forward(transition_prob_matrix, emission_prob_matrix, test_corpus, best_probs, best_paths, vocab):
    num_tags = best_probs.shape[0]

    for i in range(1, len(test_corpus)):
        if i % 5000 == 0:
            print(f"Processed {i} words...")
            
        for j in range(num_tags):
            best_prob_i = float("-inf")
            best_path_i = None
            
            for k in range(num_tags):
                prob = best_probs[k, i-1] + math.log(transition_prob_matrix.iat[k, j]) + math.log(emission_prob_matrix[j, vocab[test_corpus[i]]])
                if prob > best_prob_i:
                    best_prob_i = prob
                    best_path_i = k
                    
            best_probs[j, i] = best_prob_i
            best_paths[j, i] = best_path_i
            
    return best_probs, best_paths

In [46]:
best_probs, best_paths = viterbi_forward(transition_prob_matrix, emission_prob_matrix, prep, best_probs, best_paths, vocab)

Processed 5000 words...
Processed 10000 words...
Processed 15000 words...
Processed 20000 words...
Processed 25000 words...
Processed 30000 words...


In [51]:
print(f"best_probs at index[0, 1] is {best_probs[0, 1]}")
print(f"best_paths at index[0, 4] is {best_probs[0, 4]}")

best_probs at index[0, 1] is -24.78215632717346
best_paths at index[0, 4] is -49.560126133711904


In [48]:
def viterbi_backward(best_probs, best_paths, corpus, states):
    m = best_probs.shape[1]
    z = [None] * m
    num_tags = best_probs.shape[0]
    last_word_best_prob = float("-inf")

    pred = [None] * m
    
    for k in range(num_tags):
        if best_probs[k, m - 1] > last_word_best_prob:
            last_word_best_prob = best_probs[k, m - 1]
            z[m - 1] = k
    pred[m - 1] = states[z[m - 1]]
    
    for i in range(m - 1, -1, -1):
        pos_tag_word_i = z[i]
        z[i - 1] = best_paths[pos_tag_word_i, i]
        pred[i - 1] = states[z[i - 1]]

    return pred

In [52]:
pred = viterbi_backward(best_probs, best_paths, prep, tags)
m = len(pred)

print(f"The prediction for pred[-7:m - 1] is \n {prep[-7:m - 1]} \n {pred[-7:m-1]}")
print()
print(f"The prediction for pred[0:8] is \n {prep[0:7]} \n {pred[0:7]}")

The prediction for pred[-7:m - 1] is 
 ['see', 'them', 'here', 'with', 'us', '.'] 
 ['VB', 'PRP', 'RB', 'IN', 'PRP', '.']

The prediction for pred[0:8] is 
 ['The', 'economy', "'s", 'temperature', 'will', 'be', 'taken'] 
 ['DT', 'NN', 'POS', 'NN', 'MD', 'VB', 'VBN']


In [55]:
def compute_accuracy(pred, y):
    num_correct = 0
    total = 0
    
    for prediction, y in zip(pred, y):
        word_tag_tup = y.split("\t")
        
        if len(word_tag_tup) != 2:
            continue
            
        word, tag = word_tag_tup
        tag = tag.strip()
        
        if tag == prediction:
            num_correct += 1
            
        total += 1
    accuracy = num_correct / total
    
    return accuracy

In [56]:
acc = compute_accuracy(pred, y)
print(f"Accuracy of Viterbi Algorithm is {acc}")

Accuracy of Viterbi Algorithm is 0.953063647155511
