In [None]:
import jsonpickle
import numpy as np
import time
from sktime.classification.dictionary_based import MUSE

In [None]:
in_labels_file = 'out_labels.json'
in_glyphs_filename = 'out_normalized_glyphs.json'

In [None]:
with open(in_glyphs_filename, 'r') as glyphs_file, open(in_labels_file, 'r') as labels_file:
    glyphs = jsonpickle.decode(glyphs_file.read())
    labels = jsonpickle.decode(labels_file.read())

In [None]:
X = []
maxlen = 0
for g in glyphs:
    v_x = []
    v_y = []
    for stroke in g:
        for i in range(0, len(stroke), 2):
          v_x.append(stroke[i])
          v_y.append(stroke[i + 1])
    maxlen = max(maxlen, len(v_x))
    X.append([v_x, v_y])

In [None]:
for i in range(len(X)):
    if len(X[i][0]) < maxlen:
        X[i][0].extend([X[i][0][-1]] * (maxlen - len(X[i][0])))
        X[i][1].extend([X[i][1][-1]] * (maxlen - len(X[i][1])))

In [None]:
labels_mapping = {i: l for i, l in enumerate(sorted(set(labels)))}
labels_mapping_rev = {l: i for i, l in labels_mapping.items()}

labels = [labels_mapping_rev[l] for l in labels]

In [None]:
items = np.array(X)
labels = np.array(labels)

In [None]:
idx = 100

train_items = []
train_labels = []
test_items = []
test_labels = []
for l in labels_mapping.keys():
    label_items = items[labels == l]
    train_items.extend(label_items[:idx])
    train_labels.extend([l]*idx)
    test_items.extend(label_items[idx:])
    test_labels.extend([l]*(len(label_items) - idx))
train_items = np.array(train_items)
train_labels = np.array(train_labels)
test_items = np.array(test_items)
test_labels = np.array(test_labels)

In [None]:
clf = MUSE()

In [None]:
train_items.shape

(9700, 2, 392)

In [None]:
clf.fit(train_items, train_labels)

In [None]:
clf.score(test_items, test_labels)

0.6381443298969072

In [None]:
t = time.time()
for i in range(100):
  clf.predict(test_items[i:i + 1])
print((time.time() - t)/100)

0.18913450002670287


In [None]:
preds = clf.predict(test_items)

In [None]:
wrong_preds = {l: 0 for l in labels_mapping.keys()}

In [None]:
counter = 0
for i, pred in enumerate(preds):
  if pred != test_labels[i]:
    wrong_preds[test_labels[i]] += 1

In [None]:
pred_as = {l: {l2: 0 for l2 in labels_mapping.keys()} for l in labels_mapping.keys()}
for i, pred in enumerate(preds):
  pred_as[test_labels[i]][pred] += 1

In [None]:
for l, wp in wrong_preds.items():
  if wp > 0:
    print(f"{labels_mapping[l]}: {wp}")

!: 6
": 2
$: 2
': 7
(: 7
): 10
,: 12
-: 6
.: 9
0: 3
1: 13
2: 9
3: 3
4: 7
5: 5
6: 7
7: 7
8: 7
9: 5
:: 3
;: 5
<: 7
>: 3
?: 1
@: 2
A: 4
Aacute: 8
B: 3
C: 14
D: 6
E: 10
Eacute: 3
F: 3
G: 6
H: 4
I: 17
Iacute: 10
J: 9
K: 11
L: 7
M: 5
N: 8
Ntilde: 7
O: 11
Oacute: 10
P: 11
Q: 6
R: 4
S: 12
T: 9
U: 16
Uacute: 8
Uuml: 6
V: 11
W: 3
X: 12
Y: 10
Z: 9
a: 1
aacute: 6
b: 9
c: 15
d: 2
e: 5
eacute: 1
euro: 5
f: 7
g: 7
h: 3
i: 8
iacute: 12
iexcl: 10
iquest: 4
j: 5
k: 15
l: 8
n: 7
ntilde: 3
o: 14
oacute: 11
ordf: 4
ordm: 3
p: 8
q: 3
r: 6
s: 16
t: 9
u: 9
uacute: 5
v: 18
w: 9
x: 8
y: 11
z: 14


In [None]:
for l, l_preds in pred_as.items():
  for pred_l, count in l_preds.items():
    if pred_l != l and count > 2:
      print(f"{labels_mapping[l]} as {labels_mapping[pred_l]}: {count}")

! as ?: 4
) as (: 3
, as ': 5
. as -: 3
1 as l: 3
6 as G: 5
9 as 4: 3
Aacute as Eacute: 3
Aacute as Iacute: 3
C as <: 3
C as c: 5
Iacute as Eacute: 3
K as k: 5
M as m: 3
N as M: 3
Ntilde as ntilde: 5
O as o: 6
Oacute as oacute: 7
P as p: 3
S as s: 5
U as V: 3
Uacute as oacute: 3
Uuml as uuml: 5
X as x: 7
Z as z: 4
aacute as uacute: 5
c as C: 5
c as e: 4
i as eacute: 3
iacute as eacute: 6
iexcl as i: 3
k as K: 9
o as O: 4
p as P: 4
u as n: 4
v as V: 4
v as w: 3
w as W: 7
x as X: 6
y as f: 3
z as Z: 11
