In [1]:
import string

import numpy as np
import deap

import hmm
from discriminator import Discriminator
from ea import EA

Using TensorFlow backend.


In [2]:
x = 10
y = string.ascii_lowercase
s = [1.0] + [0.0] * (x - 1)

real_hmm = hmm.random_hmm(x, y, s)

In [3]:
print(f"""
Real HMM:
T:
{real_hmm.a}
E:
{real_hmm.b}
""")


Real HMM:
T:
[[0.01613599 0.07605661 0.07977359 0.01325312 0.18271868 0.01227868
  0.25246749 0.10679691 0.05143253 0.2090864 ]
 [0.17138024 0.21304859 0.04654616 0.10059566 0.0395872  0.11052251
  0.05207399 0.12767953 0.00105991 0.13750621]
 [0.08354753 0.03432421 0.04501804 0.02254256 0.13400041 0.11081435
  0.04167785 0.34086841 0.02179297 0.16541366]
 [0.45690813 0.05499688 0.03596126 0.0131426  0.07020599 0.00308714
  0.07722198 0.01536148 0.14627983 0.12683471]
 [0.01371559 0.01351583 0.35242209 0.15962979 0.08921682 0.19280197
  0.06817797 0.0182334  0.07262734 0.0196592 ]
 [0.0612682  0.00313462 0.20471284 0.03352784 0.05435117 0.03141101
  0.07835612 0.18324924 0.15924887 0.19074009]
 [0.20403439 0.12112357 0.04214804 0.04966981 0.24027814 0.1822247
  0.03651957 0.09501293 0.01910507 0.00988377]
 [0.04415166 0.25996119 0.0173785  0.1986834  0.11933417 0.21940882
  0.05052952 0.03745118 0.02544786 0.02765371]
 [0.01642509 0.0689291  0.17423644 0.01361156 0.06510467 0.10919837

In [4]:
epochs = 10
epoch_size = 500
batch_size = 200
seq_len = 20
# 100,000 sequences per epoch

discriminator = Discriminator(
    real_hmm, epoch_size, batch_size, seq_len, pool_size=8
)

In [5]:
discriminator.initial_train(epochs)

Epoch 1/10
Epoch 2/10
Epoch 1/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 5/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 10/10


In [6]:
pop_size = 100

ea = EA(discriminator, pop_size, states=x, symbols=len(y))

Discriminator metrics: ['loss', 'accuracy']


In [7]:
final_pop, hall_of_fame = ea.run()
ea.cleanup()

gen	nevals	avg     	min 
0  	100   	0.710638	0.54
1  	125   	0.7013  	0.605
2  	125   	0.68665 	0.57 
3  	125   	0.68115 	0.59 
4  	125   	0.6887  	0.6  
5  	125   	0.67325 	0.555
6  	125   	0.66485 	0.6  
7  	125   	0.66395 	0.625
8  	125   	0.65225 	0.59 
9  	125   	0.6628  	0.585
10 	125   	0.66155 	0.615
11 	125   	0.6629  	0.6  
12 	125   	0.66095 	0.615
13 	125   	0.6788  	0.585
14 	125   	0.6516  	0.565
15 	125   	0.642   	0.565
16 	125   	0.67105 	0.625
17 	125   	0.6591  	0.585
18 	125   	0.6649  	0.585
19 	125   	0.6556  	0.595
20 	125   	0.669   	0.61 
21 	125   	0.67525 	0.61 
22 	125   	0.6632  	0.58 
23 	125   	0.66685 	0.58 
24 	125   	0.67555 	0.605
25 	125   	0.6638  	0.61 
26 	125   	0.68825 	0.63 
27 	125   	0.6802  	0.6  
28 	125   	0.6703  	0.605
29 	125   	0.6677  	0.62 
30 	125   	0.66085 	0.615
31 	125   	0.67685 	0.625
32 	125   	0.6825  	0.65 
33 	125   	0.67645 	0.6  
34 	125   	0.68755 	0.635
35 	125   	0.68885 	0.635
36 	125   	0.68185 	0.57 
37 	125   	0.6

In [8]:
best_ind = deap.tools.selBest(final_pop, 1)[0]
ind_hmm_a, ind_hmm_b = best_ind

ind_hmm = hmm.HMM(x, np.array(list(y)), ind_hmm_a, ind_hmm_b, np.array(s))

In [9]:
hof_best_ind = deap.tools.selBest(hall_of_fame, 10)[0]
hof_hmm_a, hof_hmm_b = hof_best_ind

hof_hmm = hmm.HMM(x, np.array(list(y)), hof_hmm_a, hof_hmm_b, np.array(s))

In [10]:
print(f"""
Real HMM:
T:
{real_hmm.a}
E:
{real_hmm.b}

Best HMM:
T:
{ind_hmm.a}
E:
{ind_hmm.b}

HoF HMM:
T:
{hof_hmm.a}
E:
{hof_hmm.b}
""")


Real HMM:
T:
[[0.01613599 0.07605661 0.07977359 0.01325312 0.18271868 0.01227868
  0.25246749 0.10679691 0.05143253 0.2090864 ]
 [0.17138024 0.21304859 0.04654616 0.10059566 0.0395872  0.11052251
  0.05207399 0.12767953 0.00105991 0.13750621]
 [0.08354753 0.03432421 0.04501804 0.02254256 0.13400041 0.11081435
  0.04167785 0.34086841 0.02179297 0.16541366]
 [0.45690813 0.05499688 0.03596126 0.0131426  0.07020599 0.00308714
  0.07722198 0.01536148 0.14627983 0.12683471]
 [0.01371559 0.01351583 0.35242209 0.15962979 0.08921682 0.19280197
  0.06817797 0.0182334  0.07262734 0.0196592 ]
 [0.0612682  0.00313462 0.20471284 0.03352784 0.05435117 0.03141101
  0.07835612 0.18324924 0.15924887 0.19074009]
 [0.20403439 0.12112357 0.04214804 0.04966981 0.24027814 0.1822247
  0.03651957 0.09501293 0.01910507 0.00988377]
 [0.04415166 0.25996119 0.0173785  0.1986834  0.11933417 0.21940882
  0.05052952 0.03745118 0.02544786 0.02765371]
 [0.01642509 0.0689291  0.17423644 0.01361156 0.06510467 0.10919837

In [11]:
t_l2 = np.linalg.norm(real_hmm.a - ind_hmm.a, ord=2, axis=1)
e_l2 = np.linalg.norm(real_hmm.b - ind_hmm.b, ord=2, axis=1)

print(f"""
Trans row L2: {t_l2}
Emiss row L2: {e_l2}

Trans tot L2: {np.sum(t_l2)}
Emiss tot L2: {np.sum(e_l2)}

Total err: {hmm.total_l2_diff(real_hmm, ind_hmm)}
""")


Trans row L2: [0.26067372 0.30260934 0.48441242 0.466358   0.38890606 0.29540892
 0.29740986 0.3444194  0.15333503 0.34503011]
Emiss row L2: [0.18027673 0.33031903 0.25762539 0.26404274 0.26897273 0.27313996
 0.34651361 0.24996169 0.17794253 0.25476674]

Trans tot L2: 3.338562867428054
Emiss tot L2: 2.603561144565867

Total err: 5.94212401199392



In [12]:
hof_t_l2 = np.linalg.norm(real_hmm.a - hof_hmm.a, ord=2, axis=1)
hof_e_l2 = np.linalg.norm(real_hmm.b - hof_hmm.b, ord=2, axis=1)

print(f"""
Trans row L2: {hof_t_l2}
Emiss row L2: {hof_e_l2}

Trans tot L2: {np.sum(hof_t_l2)}
Emiss tot L2: {np.sum(hof_e_l2)}

Total err: {hmm.total_l2_diff(real_hmm, hof_hmm)}
""")


Trans row L2: [0.38870307 0.40227473 0.47748336 0.41611379 0.44093791 0.43874473
 0.26738477 0.42660441 0.291437   0.51914548]
Emiss row L2: [0.22275099 0.19668096 0.26664813 0.33168247 0.31854206 0.20759113
 0.29059098 0.25207325 0.31000714 0.28297783]

Trans tot L2: 4.068829236656049
Emiss tot L2: 2.679544944244655

Total err: 6.748374180900703



In [13]:
rand_hmm = hmm.random_hmm(real_hmm.x, real_hmm.y, real_hmm.s)
print(f"""
Rand HMM:
T:
{rand_hmm.a}
E:
{rand_hmm.b}
""")


Rand HMM:
T:
[[2.09049828e-02 6.52291925e-02 4.05466239e-02 7.32663071e-02
  2.23412327e-01 9.51835190e-02 2.89163441e-02 2.90203706e-01
  2.58508095e-02 1.36486188e-01]
 [2.06691940e-02 3.42615489e-02 2.44673193e-01 1.05361323e-02
  1.73828125e-01 1.70833846e-01 6.15676430e-02 1.18295310e-01
  1.23735453e-01 4.15995542e-02]
 [1.27488065e-01 1.75968425e-02 1.11507478e-01 7.33420782e-02
  2.03958649e-01 1.60602342e-02 1.64879061e-02 2.22317007e-01
  3.15586500e-02 1.79683089e-01]
 [1.02288595e-01 1.75037590e-01 1.74616329e-01 8.41940200e-04
  1.55428102e-01 1.07459266e-01 8.19179665e-02 1.52011381e-01
  1.39951614e-02 3.64036701e-02]
 [1.23645974e-01 1.76513732e-02 2.21803676e-01 1.58500371e-01
  1.89767047e-03 1.07704824e-01 6.66839198e-02 4.79481990e-02
  8.41055759e-02 1.70058416e-01]
 [9.72735577e-02 1.39563868e-01 2.52768720e-02 4.00496349e-02
  1.74478375e-01 8.02929064e-02 9.89222905e-02 2.47140540e-01
  2.91065847e-02 6.78953708e-02]
 [3.32475556e-02 1.55417085e-02 1.35898545e-

In [14]:
rand_t_l2 = np.linalg.norm(real_hmm.a - rand_hmm.a, ord=2, axis=1)
rand_e_l2 = np.linalg.norm(real_hmm.b - rand_hmm.b, ord=2, axis=1)

print(f"""
Trans row L2: {rand_t_l2}
Emiss row L2: {rand_e_l2}

Trans tot L2: {np.sum(rand_t_l2)}
Emiss tot L2: {np.sum(rand_e_l2)}

Total err: {hmm.total_l2_diff(real_hmm, rand_hmm)}
""")


Trans row L2: [0.32147765 0.38487312 0.19512166 0.47124296 0.26012933 0.32481074
 0.39108026 0.45204278 0.40708114 0.27337155]
Emiss row L2: [0.26421316 0.29588853 0.30016697 0.27132415 0.21846726 0.27639165
 0.30327946 0.23051402 0.21712177 0.26008369]

Trans tot L2: 3.481231184705751
Emiss tot L2: 2.637450660582121

Total err: 6.118681845287872

