In [1]:
# Imports
from aalpy.learning_algs import run_Lstar
from aalpy.oracles import StatePrefixEqOracle
from aalpy.utils import save_automaton_to_file

from RNN_SULs import RnnBinarySUL
from TrainAndExtract import train_RNN_on_tomita_grammar

In [3]:
# Learn and extract tomita 3 grammar.
rnn = train_RNN_on_tomita_grammar(tomita_grammar=3, acc_stop=1., loss_stop=0.005, train=True)

Starting train
Epoch 0: Accuracy 0.51596, Avg. Loss 12.49723 Validation Accuracy 0.52663
Epoch 1: Accuracy 0.54226, Avg. Loss 12.35145 Validation Accuracy 0.5503
Epoch 2: Accuracy 0.64096, Avg. Loss 11.92443 Validation Accuracy 0.6071
Epoch 3: Accuracy 0.71483, Avg. Loss 11.0601 Validation Accuracy 0.70414
Epoch 4: Accuracy 0.84427, Avg. Loss 8.7699 Validation Accuracy 0.83669
Epoch 5: Accuracy 0.9409, Avg. Loss 4.33512 Validation Accuracy 0.93254
Epoch 6: Accuracy 0.99025, Avg. Loss 1.76336 Validation Accuracy 0.98462
Epoch 7: Accuracy 0.99941, Avg. Loss 0.33405 Validation Accuracy 1.0
Epoch 8: Accuracy 1.0, Avg. Loss 0.06027 Validation Accuracy 1.0
Epoch 9: Accuracy 1.0, Avg. Loss 0.02952 Validation Accuracy 1.0
Epoch 10: Accuracy 1.0, Avg. Loss 0.0194 Validation Accuracy 1.0
Done training!


In [5]:
# Tomita grammars have binary alphabet.
tomita_alphabet = ["0", "1"]

# Wrap RNN in SUL class
sul = RnnBinarySUL(rnn)

# Define coverage-based equivalence oracle
# Note that walk_len is relatively short, so that extracted model that conforms to the Tomita 3 grammar.
# (and does not find further adversarial inputs that would make the model bigger)
# If you want to see how adversarial inputs can be found, use walks_per_state=500, walk_len=25 configuration
state_eq_oracle = StatePrefixEqOracle(tomita_alphabet, sul, walks_per_state=200, walk_len=6)

In [6]:
learned_model = run_Lstar(alphabet=tomita_alphabet, sul=sul, eq_oracle=state_eq_oracle, automaton_type='dfa',
                          cache_and_non_det_check=True, max_learning_rounds=10, print_level=3)

Hypothesis 1: 1 states.
---------------------
Prefixes / E set |() 
---------------------
()               |1  
---------------------
('0',)           |1  
---------------------
('1',)           |1  
---------------------
Counterexample ('0', '1', '0')
Hypothesis 2: 4 states.
---------------------------------
Prefixes / E set     |() |('0',) 
---------------------------------
()                   |1  |1      
---------------------------------
('1',)               |1  |0      
---------------------------------
('1', '0')           |0  |1      
---------------------------------
('1', '0', '1')      |0  |0      
---------------------------------
('0',)               |1  |1      
---------------------------------
('1', '1')           |1  |1      
---------------------------------
('1', '0', '0')      |1  |0      
---------------------------------
('1', '0', '1', '0') |0  |0      
---------------------------------
('1', '0', '1', '1') |0  |0      
---------------------------------
Counterex

In [7]:
# Save to file
# save_automaton_to_file(learned_model, f'RNN_Models/tomita{3}')

# Print extracted model
print(learned_model)

digraph learnedModel {
s0 [label=s0, shape=doublecircle];
s1 [label=s1, shape=doublecircle];
s2 [label=s2];
s3 [label=s3];
s4 [label=s4, shape=doublecircle];
s0 -> s0  [label=0];
s0 -> s1  [label=1];
s1 -> s2  [label=0];
s1 -> s0  [label=1];
s2 -> s4  [label=0];
s2 -> s3  [label=1];
s3 -> s3  [label=0];
s3 -> s3  [label=1];
s4 -> s2  [label=0];
s4 -> s4  [label=1];
__start0 [label="", shape=none];
__start0 -> s0  [label=""];
}

