<a href="https://colab.research.google.com/github/DmitriyValetov/nlp_course_project/blob/master/metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np

# <pad>  - 0
# <unk>  - 1 
# <sos>  - 2 
# <eos>  - 3
# word A - 4
# word B - 5

# batch targets (batch size, batch length)
bts = np.array([
[2, 1, 4, 3, 0],  # target 1
[2, 1, 4, 5, 3],  # target 2
[2, 4, 3, 0, 0]   # target 3
])
# batch predictions (batch size, batch length)
bps = np.array([
[2, 1, 5, 1, 0],  # prediction 1
[2, 1, 5, 4, 3],  # prediction 2
[2, 4, 3, 0, 0]   # prediction 3
])

In [4]:
# set (unique) metrics (without order)
for p, t in zip(bts, bps):
  t_set = set(t)  # unique labels
  t_set -= {0, 1, 2, 3}  # remove special labels
  eos = p[p == 3][0] if 3 in p else len(p)  # find first <eos> in prediction
  p_eos = p[:eos]  # cut prediction to first <eos>
  p_set = set(p_eos)  # unique labels
  p_set -= {0, 1, 2, 3}  # remove special labels
  i_set = t_set.intersection(p_set)  # shared labels
  pre = len(i_set)/len(p_set) if len(p_set) != 0 else 0.  # precision
  rec = len(i_set)/len(t_set) if len(t_set) != 0 else 0.  # recalll
  f1 = 2*pre*rec/(pre + rec) if pre + rec != 0 else 0.  # f1
  relen = len(p_eos)/len(t)
  print(f'target: {t}, prediction: {p}, cut prediction: {p_eos}')
  print(f'target set: {t_set}, prediction set: {p_set}')
  print(f'precision: {pre}, recall: {rec}, f1: {f1}')
  print(f'relative length: {relen}\n')


target: [2 1 5 1 0], prediction: [2 1 4 3 0], cut prediction: [2 1 4]
target set: {5}, prediction set: {4}
precision: 0.0, recall: 0.0, f1: 0.0
relative length: 0.6

target: [2 1 5 4 3], prediction: [2 1 4 5 3], cut prediction: [2 1 4]
target set: {4, 5}, prediction set: {4}
precision: 1.0, recall: 0.5, f1: 0.6666666666666666
relative length: 0.6

target: [2 4 3 0 0], prediction: [2 4 3 0 0], cut prediction: [2 4 3]
target set: {4}, prediction set: {4}
precision: 1.0, recall: 1.0, f1: 1.0
relative length: 0.6



In [5]:
# accuracy (ordered)
pad_mask = bts != 0  # mask <pad> at target
m_bts = bts[pad_mask]  # mask targets
m_bps = bps[pad_mask]  # mask predictions like targets
comp = m_bts == m_bps  # compare
acc = np.sum(comp) / len(m_bts)
print(f'targets:\n{bts}')
print(f'predictions:\n{bps}')
print(f'pad_mask:\n{pad_mask}')
print(f'masked targets:\n{m_bts}')
print(f'masked predictions:\n{m_bps}')
print(f'comparison\n{comp}')
print(f'accuracy: {acc}')

targets:
[[2 1 4 3 0]
 [2 1 4 5 3]
 [2 4 3 0 0]]
predictions:
[[2 1 5 1 0]
 [2 1 5 4 3]
 [2 4 3 0 0]]
pad_mask:
[[ True  True  True  True False]
 [ True  True  True  True  True]
 [ True  True  True False False]]
masked targets:
[2 1 4 3 2 1 4 5 3 2 4 3]
masked predictions:
[2 1 5 1 2 1 5 4 3 2 4 3]
comparison
[ True  True False False  True  True False False  True  True  True  True]
accuracy: 0.6666666666666666
