In [60]:
import jsonpickle
import numpy as np
import time
from sktime.classification.dictionary_based import MUSE, TemporalDictionaryEnsemble

In [61]:
in_labels_file = 'out_labels.json'
in_glyphs_filename = 'out_normalized_glyphs.json'

In [62]:
with open(in_glyphs_filename, 'r') as glyphs_file, open(in_labels_file, 'r') as labels_file:
    glyphs = jsonpickle.decode(glyphs_file.read())
    labels = jsonpickle.decode(labels_file.read())

In [63]:
X = []
maxlen = 0
for g in glyphs:
    v_x = []
    v_y = []
    for stroke in g:
        for i in range(0, len(stroke), 2):
          v_x.append(stroke[i])
          v_y.append(stroke[i + 1])
    maxlen = max(maxlen, len(v_x))
    X.append([v_x, v_y])

In [49]:
for i in range(len(X)):
    while len(X[i][0]) < maxlen:
        X[i][0].extend(X[i][0][:maxlen - len(X[i][0])])
        X[i][1].extend(X[i][1][:maxlen - len(X[i][1])])

In [50]:
labels_mapping = {i: l for i, l in enumerate(sorted(set(labels)))}
labels_mapping_rev = {l: i for i, l in labels_mapping.items()}

labels = [labels_mapping_rev[l] for l in labels]

In [51]:
items = np.array(X)
labels = np.array(labels)

In [52]:
idx = 100

train_items = []
train_labels = []
test_items = []
test_labels = []
for l in labels_mapping.keys():
    label_items = items[labels == l]
    train_items.extend(label_items[:idx])
    train_labels.extend([l]*idx)
    test_items.extend(label_items[idx:])
    test_labels.extend([l]*(len(label_items) - idx))
train_items = np.array(train_items)
train_labels = np.array(train_labels)
test_items = np.array(test_items)
test_labels = np.array(test_labels)

In [56]:
clf = MUSE()

In [57]:
train_items.shape

(9700, 2, 392)

In [58]:
clf.fit(train_items, train_labels)

In [59]:
clf.score(test_items, test_labels)

0.6932989690721649

In [None]:
t = time.time()
for i in range(100):
  clf.predict(test_items[i:i + 1])
print((time.time() - t)/100)

0.18913450002670287


In [64]:
preds = clf.predict(test_items)

In [65]:
wrong_preds = {l: 0 for l in labels_mapping.keys()}

In [66]:
counter = 0
for i, pred in enumerate(preds):
  if pred != test_labels[i]:
    wrong_preds[test_labels[i]] += 1

In [67]:
pred_as = {l: {l2: 0 for l2 in labels_mapping.keys()} for l in labels_mapping.keys()}
for i, pred in enumerate(preds):
  pred_as[test_labels[i]][pred] += 1

In [None]:
for l, wp in wrong_preds.items():
  if wp > 0:
    print(f"{labels_mapping[l]}: {wp}")

!: 6
": 2
$: 2
': 7
(: 7
): 10
,: 12
-: 6
.: 9
0: 3
1: 13
2: 9
3: 3
4: 7
5: 5
6: 7
7: 7
8: 7
9: 5
:: 3
;: 5
<: 7
>: 3
?: 1
@: 2
A: 4
Aacute: 8
B: 3
C: 14
D: 6
E: 10
Eacute: 3
F: 3
G: 6
H: 4
I: 17
Iacute: 10
J: 9
K: 11
L: 7
M: 5
N: 8
Ntilde: 7
O: 11
Oacute: 10
P: 11
Q: 6
R: 4
S: 12
T: 9
U: 16
Uacute: 8
Uuml: 6
V: 11
W: 3
X: 12
Y: 10
Z: 9
a: 1
aacute: 6
b: 9
c: 15
d: 2
e: 5
eacute: 1
euro: 5
f: 7
g: 7
h: 3
i: 8
iacute: 12
iexcl: 10
iquest: 4
j: 5
k: 15
l: 8
n: 7
ntilde: 3
o: 14
oacute: 11
ordf: 4
ordm: 3
p: 8
q: 3
r: 6
s: 16
t: 9
u: 9
uacute: 5
v: 18
w: 9
x: 8
y: 11
z: 14


In [69]:
for l, l_preds in pred_as.items():
  for pred_l, count in l_preds.items():
    if pred_l != l and count > 2:
      print(f"{labels_mapping[l]} as {labels_mapping[pred_l]}: {count}")

! as ?: 3
' as ,: 5
) as (: 5
0 as a: 5
4 as H: 5
6 as G: 4
> as <: 3
A as a: 3
C as <: 3
C as c: 6
E as Eacute: 4
E as F: 4
F as Ntilde: 3
H as M: 4
K as k: 3
M as m: 5
Ntilde as ntilde: 4
O as 8: 3
O as o: 5
Oacute as oacute: 4
P as D: 4
P as p: 4
S as s: 4
Uuml as m: 3
Uuml as uuml: 5
V as W: 3
W as m: 3
W as w: 4
X as x: 10
c as C: 3
i as eacute: 3
iacute as Iacute: 3
iacute as eacute: 4
k as K: 7
l as f: 3
n as m: 3
n as u: 3
oacute as Eacute: 3
oacute as Oacute: 4
q as g: 3
s as S: 5
w as W: 5
w as m: 3
x as X: 5
y as g: 4
z as Z: 10
